对于情感分析,我使用以下函数来计算正面单词数组(p_Words)中存在的文本中的单词数以及(n_Words)中存在的单词数。有没有办法以更少的时间复杂度执行相同的功能?
我相信下面是O(N * M),因为indexOf是O(N)而I循环遍及整个字符串O(M)。
var content = 'I am happy, this is such a beautiful morning';
var pCount = 0;
var nCount = 0;
var sCount = 0;
var words = content.split(" ");
var size = words.length;
for (var j = 0; j < size; j++) {
if (s_Words.indexOf(words[j]) > -1) {
sCount++;
} else {
if (p_Words.indexOf(words[j]) > -1) {
pCount++;
}
if (n_Words.indexOf(words[j]) > -1) {
nCount++;
}
}
}
console.log(pCount);
console.log(nCount);
console.log(size);
答案 0 :(得分:0)
您可以将每个数组转换为正则表达式,并计算匹配结果:
var s_Words = ['I', 'is', 'am', 'this', 'a'];
var p_Words = ['happy', 'beautiful', 'morning'];
var n_Words = ['such'];
var content = 'I am happy, this is such a beautiful morning';
function count(arr, str) {
var pattern = new RegExp('\\b(' + arr.join('|') +')\\b', 'ig');
return (str.match(pattern) || []).length;
}
console.log('s_Words: ', count(s_Words, content));
console.log('p_Words: ', count(p_Words, content));
console.log('n_Words: ', count(n_Words, content));
&#13;
另一种选择是创建一个包含其类型的单词字典,然后您可以在O(n)中迭代该字符串以获取计数:
var x_words = {
s: ['I', 'is', 'am', 'this', 'a'],
p: ['happy', 'beautiful', 'morning'],
n: ['such']
};
var dict = Object.keys(x_words).reduce(function(r, key) {
x_words[key].forEach(function(w) {
r[w] = key;
});
return r;
}, Object.create(null));
var content = 'I am happy, this is such a beautiful morning';
var words = content.match(/\w+/g);
var counts = words.reduce(function(r, w) {
var key = dict[w];
r[key] = (r[key] || 0) + 1;
return r;
}, Object.create(null));
console.log(counts);
&#13;