Question

对于情感分析，我使用以下函数来计算正面单词数组（p_Words）中存在的文本中的单词数以及（n_Words）中存在的单词数。有没有办法以更少的时间复杂度执行相同的功能？

我相信下面是O（N * M），因为indexOf是O（N）而I循环遍及整个字符串O（M）。

var content = 'I am happy, this is such a beautiful morning';
var pCount = 0;
var nCount = 0;
var sCount = 0;
var words = content.split(" ");
var size = words.length;

for (var j = 0; j < size; j++) {
    if (s_Words.indexOf(words[j]) > -1) {
        sCount++;
    } else {
        if (p_Words.indexOf(words[j]) > -1) {
            pCount++;
        }
        if (n_Words.indexOf(words[j]) > -1) {
            nCount++;
        }
    }
}
console.log(pCount);
console.log(nCount);
console.log(size);

Answer 1

您可以将每个数组转换为正则表达式，并计算匹配结果：

＆＃13;

var s_Words = ['I', 'is', 'am', 'this', 'a'];
var p_Words = ['happy', 'beautiful', 'morning'];
var n_Words = ['such'];

var content = 'I am happy, this is such a beautiful morning';

function count(arr, str) {
  var pattern = new RegExp('\\b(' + arr.join('|') +')\\b', 'ig');

  return (str.match(pattern) || []).length;
}

console.log('s_Words: ', count(s_Words, content));
console.log('p_Words: ', count(p_Words, content));
console.log('n_Words: ', count(n_Words, content));

＆＃13;

另一种选择是创建一个包含其类型的单词字典，然后您可以在O（n）中迭代该字符串以获取计数：

＆＃13;

var x_words = {
  s: ['I', 'is', 'am', 'this', 'a'],
  p: ['happy', 'beautiful', 'morning'],
  n: ['such']
};

var dict = Object.keys(x_words).reduce(function(r, key) {
  x_words[key].forEach(function(w) {
    r[w] = key;
  });
  
  return r;
}, Object.create(null));

var content = 'I am happy, this is such a beautiful morning';
var words = content.match(/\w+/g);

var counts = words.reduce(function(r, w) {
  var key = dict[w];
  r[key] = (r[key] || 0) + 1;
  
  return r;
}, Object.create(null));

console.log(counts);

＆＃13;

为Sentiment Analysis优化JavaScript时间复杂度（循环+ IndexOf）

1 个答案: