为Sentiment Analysis优化JavaScript时间复杂度(循环+ IndexOf)

时间:2018-01-14 23:36:04

标签: javascript arrays node.js performance time-complexity

对于情感分析,我使用以下函数来计算正面单词数组(p_Words)中存在的文本中的单词数以及(n_Words)中存在的单词数。有没有办法以更少的时间复杂度执行相同的功能?

我相信下面是O(N * M),因为indexOf是O(N)而I循环遍及整个字符串O(M)。

var content = 'I am happy, this is such a beautiful morning';
var pCount = 0;
var nCount = 0;
var sCount = 0;
var words = content.split(" ");
var size = words.length;

for (var j = 0; j < size; j++) {
    if (s_Words.indexOf(words[j]) > -1) {
        sCount++;
    } else {
        if (p_Words.indexOf(words[j]) > -1) {
            pCount++;
        }
        if (n_Words.indexOf(words[j]) > -1) {
            nCount++;
        }
    }
}
console.log(pCount);
console.log(nCount);
console.log(size);

1 个答案:

答案 0 :(得分:0)

您可以将每个数组转换为正则表达式,并计算匹配结果:

&#13;
&#13;
var s_Words = ['I', 'is', 'am', 'this', 'a'];
var p_Words = ['happy', 'beautiful', 'morning'];
var n_Words = ['such'];

var content = 'I am happy, this is such a beautiful morning';

function count(arr, str) {
  var pattern = new RegExp('\\b(' + arr.join('|') +')\\b', 'ig');

  return (str.match(pattern) || []).length;
}

console.log('s_Words: ', count(s_Words, content));
console.log('p_Words: ', count(p_Words, content));
console.log('n_Words: ', count(n_Words, content));
&#13;
&#13;
&#13;

另一种选择是创建一个包含其类型的单词字典,然后您可以在O(n)中迭代该字符串以获取计数:

&#13;
&#13;
var x_words = {
  s: ['I', 'is', 'am', 'this', 'a'],
  p: ['happy', 'beautiful', 'morning'],
  n: ['such']
};

var dict = Object.keys(x_words).reduce(function(r, key) {
  x_words[key].forEach(function(w) {
    r[w] = key;
  });
  
  return r;
}, Object.create(null));

var content = 'I am happy, this is such a beautiful morning';
var words = content.match(/\w+/g);

var counts = words.reduce(function(r, w) {
  var key = dict[w];
  r[key] = (r[key] || 0) + 1;
  
  return r;
}, Object.create(null));

console.log(counts);
&#13;
&#13;
&#13;