找到单词在数组中彼此相邻的单词

时间:2017-02-06 09:10:00

标签: javascript arrays sorting

我有大量的字符串(单词),我正在分析模式。

我想创建一个函数:

  • 确定单词在特定顺序中不止一次出现的时间
  • 对于单词按顺序出现的每个实例,将它们组合成一个数组元素。

实施例

给出以下数组

let array = ["john", "smith", "says", "that", "a", "lock", "smith", "can", "open", "the", "lock", "unlike", "john", "smith"]

期望的结果:

["john smith", "says", "that", "a", "lock", "smith", "can", "open", "the", "lock", "unlike", "john smith"]

理想情况下,该功能不仅仅识别2个单词的组合(即识别“白色”,“房子”,“按下”,“秘书”的组合何时出现不止一次。

我真的在努力学习要展示的逻辑。我也一直在寻找像underscore.js这样的库中的解决方案而没有运气。

3 个答案:

答案 0 :(得分:2)

建立一个"字典"所有的话和他们的直接继承人。然后循环遍历原始数组,并为每个元素检查所有字典返回是否匹配,如果是,则组合单词并跳过直接后继。



var arr = ["john", "smith", "says", "that", "a", "lock", "smith", "can", "open", "the", "lock", "unlike", "john", "smith"];

function combineCommon(arr) {
  var dictionary = {};
  for (var a = 0; a < arr.length - 1; a++) {
    var A = arr[a];
    if (dictionary[A] == void 0) {
      dictionary[A] = [];
    }
    dictionary[A].push(arr[a + 1]);
  }
  var res = [];
  for (var index = 0; index < arr.length; index++) {
    var element = arr[index];
    var pass = false;
    if (dictionary[element].length > 1) {
      if (dictionary[element]
        .some(function(a) {
          return a != dictionary[element][0];
        }) == false) {
        pass = true;
      }
    }
    if (pass) {
      res.push(arr[index] + " " + dictionary[element][0]);
      index++;
    } else {
      res.push(arr[index]);
    }
  }
  return res;
}
console.log(combineCommon(arr));
&#13;
&#13;
&#13;

答案 1 :(得分:1)

您可以在重新组合结果时计算对并检查对。

var array = ["john", "smith", "says", "that", "a", "lock", "foo", "bar", "baz", "smith", "can", "open", "foo", "bar", "baz", "the", "lock", "unlike", "john", "smith"],
    count = Object.create(null),
    result;

array.forEach(function (a, i, aa) {
    var key = aa.slice(i, i + 2).join(' ');
    count[key] = (count[key] || 0) + 1;
});
result = array.reduce(function (r, a, i, aa) {
    var key = aa.slice(i, i + 2).join(' ');
    if (count[key] > 1) {
        a = key;
    } else if (count[aa.slice(i - 1, i + 1).join(' ')] > 1) {
        a = [];
    }
    return r.concat(a);
}, []);

console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }

答案 2 :(得分:0)

请检查一下。

&#13;
&#13;
var data = 
["john", "smith", "says", "that", "a", "lock", "smith", "can", "open", "the", "lock", "unlike", "john", "smith"]

var result= [];
var flag=0;
var n=data.length;
var k=0;

// Outer main for loop.
for(var i=0;i<n;i++){
  // Get next word.
  next_word = data[i+1];

  
  flag=0;
    
  // Inner for loop.
  for(var j=0;j<n;j++){
    
    // john == john && smith == smith
    // smith == john && smith == smith 
    // ..
    // ..
    if(data[j]==data[i] && data[j+1]==next_word){
      flag++;
      temp_word = data[i]+' '+next_word;
    }
  }
    
  // If flag more than 1 that means same word sequence found more than one time.
  if(flag>1){
    result[k++]=temp_word; // Assign temp_word to result array. 
    i++; // increase outer loop by one so double entry we can restrict.
  }else{
    // If no sequence found then pass outer value to result value as it is.
    result[k++]=data[i];
  }

}

console.log(result);
&#13;
&#13;
&#13;