计算文本中单词的频率

时间:2013-06-22 04:03:03

标签: c++ string counting

我写了一个函数来计算文本中特定单词的频率。这个程序每次都返回零。我怎么能改进它?

while (fgets(sentence, sizeof sentence, cfPtr))
{
for(j=0;j<total4;j++)
        {
            frequency[j] = comparision(sentence,&w);
            all_frequency+=frequency[j];
}}
.
.
.
int comparision(const char sentence[ ],char *w)
{  
    int length=0,count=0,l=0,i;
    length= strlen(sentence);
    l= strlen(w);
    while(sentence[i]!= '\n')
    if(strncmp(sentence,w,l))
        count++;
    i++;
    return count;
    }

1 个答案:

答案 0 :(得分:2)

我已校对您的代码,并对编码样式和变量名称进行了评论。那里 仍然是我留下条件的一个缺陷,这是由于没有迭代通过 句。

以下是您的代码:

while(fgets(sentence, sizeof sentence, cfPtr)) {
    for(j=0;j<total4;j++){
        frequency[j] = comparision(sentence,&w);
        all_frequency+=frequency[j];
    }

}

// int comparision(const char sentence[ ],char *w)  w is a poor variable name in this case.

int comparison(const char sentence[ ], char *word)  //word is a better name.
{

    //int length=0,count=0,l=0,i;   

    //Each variable should get its own line.
    //Also, i should be initialized and l is redundant.
    //Here are properly initialized variables:

    int length = 0;
    int count = 0;
    int i = 0;

    //length= strlen(sentence);   This is redundant, as you know that the line ends at '\n'

    length = strlen(word);  //l is replaced with length.

    //while(sentence[i]!= '\n') 

    //The incrementor and the if statement should be stored inside of a block 
    //(Formal name for curley braces).

    while(sentence[i] != '\n'){
        if(strncmp(sentence, word, length) == 0)  //strncmp returns 0 if equal, so you       
            count++;                              //should compare to 0 for equality
        i++;
    }
    return count;
}