查找重复的两个单词之间的序列-javascript

时间:2019-04-04 13:20:01

标签: javascript regex

我正在尝试获取水果的数量及其名称。 首先,我想将字符串切成子字符串,因为我知道句子应该在哪里开始和结束。 获得子字符串后,可以检查编号和水果的名称。 我将有一个数组,其中包含水果的名称,每个子字符串仅显示一个数字。

var fruits = ["apple","orange","kiwi","banana"];
var string = 'I want to <start> eat 6 slices of apples <end> in the morning and <start> 1 orange in the evening <end> and <start> 4 more slices of apple before bed <end>'
var pattern = /(?<=<start>\s).*(?=<end>)/g;
var substrings = pattern.exec(string);
var fruit;
for(var i = 0; i < substrings.length; i++){
  for(var j = 0; j < fruits.length; j++){
      fruit = substrings.match(/(fruits[j])/);
   }
  var number = substrings.match(/\d/);
}

我希望输出:10个苹果,1个橙色;

4 个答案:

答案 0 :(得分:1)

一个想法是捕获(编号)任何东西(某种水果),作为正则表达式,确保它使用了全局标志,这使您可以为找到的每个部分循环。在正则表达式中使用捕获组使总结起来很容易。

因此,一个简单的正则表达式是-> /(\d).*?(apple|orange|kiwi|banana)/g

还请注意.*?,这被称为非贪婪捕获,如果不使用它,它将持续捕获,直到最后发现水果为止。

在下面,我只是使用new RegExp构建上面的正则表达式。

然后,因为我使用了全局标志,所以能够遍历所有找到的术语和总和。

  

ps。这样做不需要起始位和结束位,但是您需要   如果确实需要,可以添加它。

const fruits = ["apple","orange","kiwi","banana"];
const string = 'I want to <start> eat 6 slices of apples <end> in the morning and <start> 1 orange in the evening <end> and <start> 4 more slices of apple before bed <end>'

const find = new RegExp(`(\\d).*?(${fruits.join("|")})`, "g");  //build the regular expression
const sums = {}; //somewhere to store our sums.

while (true) {
  const f = find.exec(string);
  if (!f) break;  //no more found
  const [_, qty, fruit] = f;
  sums[fruit] = (sums[fruit] | 0) + (qty | 0); //sum the results.
}
 
console.log(sums);

答案 1 :(得分:1)

我对您的代码进行了一些编辑,看来很有效:

var fruits = ["apple","orange","kiwi","banana"]
var string = 'I want to <start> eat 6 slices of apples <end> in the morning and <start> 1 orange in the evening <end> and <start> 4 more slices of apple before bed <end>'
var pattern = /\<start\>\s*.*?\<end\>/g
var substrings = string.match(pattern)
var fruitsDict = {};
for(var i = 0; i < substrings.length; i++){
  for(var j = 0; j < fruits.length; j++){
    if (substrings[i].match(RegExp(fruits[j]))) {
      num = substrings[i].match(/\d+/)[0]
      fruitsDict[fruits[j]] = (fruitsDict[fruits[j]] || 0) + parseInt(num)
    }
  }
}
console.log(fruitsDict)

答案 2 :(得分:0)

您的代码中几乎没有问题(带注释的修复程序):

var fruits = ["apple","orange","kiwi","banana"];
var string = 'I want to <start> eat 6 slices of apples <end> in the morning and <start> 1 orange in the evening <end> and <start> 4 more slices of apple before bed <end>'
// we need .*? to match non greedy otherwise it will match whole string first and last <>
var pattern = /(?<=<start>\s).*?(?=<end>)/g;
while (true) {
  // we need to loop because exec return single match
  var substrings = pattern.exec(string);
  if (!substrings) {
    break;
  }
  var fruit;
  // you don't need a loop because exec will create array with one value
  for(var j = 0; j < fruits.length; j++){
      // to create regex from variable use regex constructor
      var re = new RegExp('(' + fruits[j] + ')');
      fruit = substrings[0].match(re);
      // break the loop or you may have null on next fruit
      if (fruit) {
        break;
      }
   }
   // match any number of digits so it match 10 or 100
   var number = substrings[0].match(/\d+/);
   console.log({number, fruit});
}

var fruits = ["apple","orange","kiwi","banana"];
var fruits_re = new RegExp(fruits.join('|'));

您还可以通过为水果创建正则表达式来简化代码

var fruits = ["apple","orange","kiwi","banana"];
var fruits_re = new RegExp(fruits.join('|'));
var string = 'I want to <start> eat 6 slices of apples <end> in the morning and <start> 1 orange in the evening <end> and <start> 4 more slices of apple before bed <end>';
var pattern = /(?<=<start>\s).*?(?=<end>)/g;
while (true) {
  var substrings = pattern.exec(string);
  if (!substrings) {
    break;
  }
  var fruit = substrings[0].match(fruits_re);
  var number = substrings[0].match(/\d+/);
  console.log({number, fruit});
}

答案 3 :(得分:0)

我的建议是:

  • 匹配所有模式
  • 对于每个匹配项,更新结果对象

var fruits = ["apple","orange","kiwi","banana"];
var string = 'I want to <start> eat 60 slices of apples <end> in the morning and <start> 1 orange in the evening <end> and <start> 4 more slices of apple before bed <end>'

var result = {};
string.match(/<start>.[^<>]*<end>/g).forEach(function(ele) {
    var n = +ele.replace(/\D/g,'');
    var x = fruits.find((e) => ele.indexOf(e)>-1);
    result[x] = (result[x] | 0) + n;
});
console.log(result);