如何实现javascript函数来计算给定句子中每个单词的频率。
这是我的代码:
function search () {
var data = document.getElementById('txt').value;
var temp = data;
var words = new Array();
words = temp.split(" ");
var uniqueWords = new Array();
var count = new Array();
for (var i = 0; i < words.length; i++) {
//var count=0;
var f = 0;
for (j = 0; j < uniqueWords.length; j++) {
if (words[i] == uniqueWords[j]) {
count[j] = count[j] + 1;
//uniqueWords[j]=words[i];
f = 1;
}
}
if (f == 0) {
count[i] = 1;
uniqueWords[i] = words[i];
}
console.log("count of " + uniqueWords[i] + " - " + count[i]);
}
}
我无法追查问题..任何帮助都非常有用。 以这种格式输出: 计数是-1 计数 - 2 ..
输入:这是anil是kum the anil
答案 0 :(得分:15)
我认为你有多个数组,字符串,并且在循环和嵌套循环之间频繁(并且难以跟踪)上下文切换,你会感到过于复杂。
以下是我鼓励您考虑采取的方法。我已经通过内联评论来解释每一步。如果其中任何一项不清楚,请在评论中告诉我,我将重新审视以提高清晰度。
(function () {
/* Below is a regular expression that finds alphanumeric characters
Next is a string that could easily be replaced with a reference to a form control
Lastly, we have an array that will hold any words matching our pattern */
var pattern = /\w+/g,
string = "I I am am am yes yes.",
matchedWords = string.match( pattern );
/* The Array.prototype.reduce method assists us in producing a single value from an
array. In this case, we're going to use it to output an object with results. */
var counts = matchedWords.reduce(function ( stats, word ) {
/* `stats` is the object that we'll be building up over time.
`word` is each individual entry in the `matchedWords` array */
if ( stats.hasOwnProperty( word ) ) {
/* `stats` already has an entry for the current `word`.
As a result, let's increment the count for that `word`. */
stats[ word ] = stats[ word ] + 1;
} else {
/* `stats` does not yet have an entry for the current `word`.
As a result, let's add a new entry, and set count to 1. */
stats[ word ] = 1;
}
/* Because we are building up `stats` over numerous iterations,
we need to return it for the next pass to modify it. */
return stats;
}, {} );
/* Now that `counts` has our object, we can log it. */
console.log( counts );
}());
答案 1 :(得分:11)
这是一个JavaScript函数,用于获取句子中每个单词的频率:
function wordFreq(string) {
var words = string.replace(/[.]/g, '').split(/\s/);
var freqMap = {};
words.forEach(function(w) {
if (!freqMap[w]) {
freqMap[w] = 0;
}
freqMap[w] += 1;
});
return freqMap;
}
它将返回单词到字数的哈希值。例如,如果我们这样运行它:
console.log(wordFreq("I am the big the big bull."));
> Object {I: 1, am: 1, the: 2, big: 2, bull: 1}
您可以使用Object.keys(result).sort().forEach(result) {...}
迭代这些字词。所以我们可以像这样挂钩:
var freq = wordFreq("I am the big the big bull.");
Object.keys(freq).sort().forEach(function(word) {
console.log("count of " + word + " is " + freq[word]);
});
哪个会输出:
count of I is 1
count of am is 1
count of big is 2
count of bull is 1
count of the is 2
JSFiddle:http://jsfiddle.net/ah6wsbs6/
这是ES6中的wordFreq
函数:
function wordFreq(string) {
return string.replace(/[.]/g, '')
.split(/\s/)
.reduce((map, word) =>
Object.assign(map, {
[word]: (map[word])
? map[word] + 1
: 1,
}),
{}
);
}
JSFiddle:http://jsfiddle.net/r1Lo79us/
答案 2 :(得分:1)
以下是您自己代码的更新版本......
<!DOCTYPE html>
<html>
<head>
<title>string frequency</title>
<style type="text/css">
#text{
width:250px;
}
</style>
</head>
<body >
<textarea id="txt" cols="25" rows="3" placeholder="add your text here"> </textarea></br>
<button type="button" onclick="search()">search</button>
<script >
function search()
{
var data=document.getElementById('txt').value;
var temp=data;
var words=new Array();
words=temp.split(" ");
var unique = {};
for (var i = 0; i < words.length; i++) {
var word = words[i];
console.log(word);
if (word in unique)
{
console.log("word found");
var count = unique[word];
count ++;
unique[word]=count;
}
else
{
console.log("word NOT found");
unique[word]=1;
}
}
console.log(unique);
}
</script>
</body>
我认为你的循环过于复杂。此外,尝试在仍然对单词数组进行第一次传递时产生最终计数肯定会失败,因为在检查数组中的每个单词之前,您无法测试唯一性。
我使用Javascript对象作为关联数组,而不是所有计数器,因此我们可以存储每个唯一的单词,以及它出现次数的计数。
然后,一旦我们退出循环,我们就可以看到最终结果。
此外,此解决方案不使用正则表达式;)
我还要补充说,根据空格计算单词非常困难。在此代码中,&#34;一,二,一&#34;将导致&#34; one,&#34;和&#34;一个&#34;作为不同的,独特的单词。
答案 3 :(得分:1)
虽然这里的两个答案都是正确的,但也许更好,但没有一个解决OP的问题(他的代码有什么问题)。
OP代码的问题在于:
if(f==0){
count[i]=1;
uniqueWords[i]=words[i];
}
在每个新单词(唯一单词)上,代码会将该单词添加到uniqueWords
,该单词位于words
中。因此uniqueWords
数组存在空白。这是一些undefined
值的原因。
尝试打印uniqueWords
。它应该给出类似的东西:
[&#34;这&#34;,&#34;是&#34;,&#34; anil&#34;,4:&#34; kum&#34;,5:&#34;&# 34]
请注意索引3没有元素。
最后计数的打印应该在处理words
数组中的所有单词之后。
此处有更正版本:
function search()
{
var data=document.getElementById('txt').value;
var temp=data;
var words=new Array();
words=temp.split(" ");
var uniqueWords=new Array();
var count=new Array();
for (var i = 0; i < words.length; i++) {
//var count=0;
var f=0;
for(j=0;j<uniqueWords.length;j++){
if(words[i]==uniqueWords[j]){
count[j]=count[j]+1;
//uniqueWords[j]=words[i];
f=1;
}
}
if(f==0){
count[i]=1;
uniqueWords[i]=words[i];
}
}
for ( i = 0; i < uniqueWords.length; i++) {
if (typeof uniqueWords[i] !== 'undefined')
console.log("count of "+uniqueWords[i]+" - "+count[i]);
}
}
我刚刚将处理循环中的计数打印移动到一个新循环并添加了if not undefined
检查。
答案 4 :(得分:0)
对于slightly better efficiency,我将使用Sampson的match-reduce方法。这是它的修改后的版本,可以立即投入生产。它不是完美的,但它应该涵盖绝大多数情况(即“足够好”)。
function calcWordFreq(s) {
// Normalize
s = s.toLowerCase();
// Strip quotes and brackets
s = s.replace(/["“”(\[{}\])]|\B['‘]([^'’]+)['’]/g, '$1');
// Strip dashes and ellipses
s = s.replace(/[‒–—―…]|--|\.\.\./g, ' ');
// Strip punctuation marks
s = s.replace(/[!?;:.,]\B/g, '');
return s.match(/\S+/g).reduce(function(oFreq, sWord) {
if (oFreq.hasOwnProperty(sWord)) ++oFreq[sWord];
else oFreq[sWord] = 1;
return oFreq;
}, {});
}
calcWordFreq('A ‘bad’, “BAD” wolf-man...a good ol\' spook -- I\'m frightened!')
返回
{
"a": 2
"bad": 2
"frightened": 1
"good": 1
"i'm": 1
"ol'": 1
"spook": 1
"wolf-man": 1
}
答案 5 :(得分:0)
const sentence = 'Hi my friend how are you my friend';
const countWords = (sentence) => {
const convertToObject = sentence.split(" ").map( (i, k) => {
return {
element: {
word: i,
nr: sentence.split(" ").filter(j => j === i).length + ' occurrence',
}
}
});
return Array.from(new Set(convertToObject.map(JSON.stringify))).map(JSON.parse)
};
console.log(countWords(sentence));