C ++字计数器

时间:2014-05-14 15:30:31

标签: c++ word counter

如何将此c ++代码“char word [50]”更改为“string word [50]”,因为我想计算并计算文本中的每个单词,而不仅仅是我的输入单词。关于TF / IDF计算的所有计算。

#include <iostream>
#include <stdio.h>
#include <conio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <iomanip>  
#include <cmath>

using namespace std;

/**************************************************/

这里有char字[50];

 int ch,match[3],cnt,i,cntt[3],matchh[3],occurence,flg[3];
 float tf,idf;
 char word[50],line[50];
 int nd=3,iterm;
 int doc[20];

 /**************************************************/

 void case1() 
 {

 FILE *fill[3];
 fill[1]=fopen("doc1.txt","r");
 fill[2]=fopen("doc2.txt","r"); 
 fill[3]=fopen("doc3.txt","r");
 match[i]=0;
 cnt=0;

我需要在这里更改单词,当我将字[50]改为字符串[50]时,会给出一些错误

for(i=1;i<4;i++)
{
while(!feof(fill[i]))
{
fscanf(fill[i],"%s",line);
//  if(strcmp(line,word)==0)

   if(strstr(line,word)!=0)

   match[i]++;
   cntt[i]++;
}

fclose(fill[i]);

}

cout<<"\n----------------Total # of Word-------------------\n";

for(i=1;i<4;i++)

{

cout<<"\n Documant "<<i<<" = "<<cntt[i];

}

cout<<"\n\n------------------Term Counts--------------------\n";


cout<<word<<"  ";
cout<<"(";
for(i=1;i<4;i++)
{

cout<<"Doc"<<i<<","<<match[i]<<" ; ";

}
cout<<")";
cout << "\n\n\n |  Words  |    D1     |    D2    |    D3    |  \n";
cout<<"----------------------------------------------------------\n";


 cout<<"     "<<word;

for(i=1;i<4;i++)

{

 tf=(float)match[i]/cntt[i];   //Term Frequency

 cout<<"      "<<tf;

}


cout<<"\n\n\n\n\n\n\n";
}

/**************************************************/

void case2() 
{
FILE *fill[3];
fill[1]=fopen("doc1.txt","r");
fill[2]=fopen("doc2.txt","r");
fill[3]=fopen("doc3.txt","r");
match[i]=0;
cnt=0;

cout<<"\n Total Number Of Documants => 3";
for(i=1;i<4;i++)
{
while(!feof(fill[i]))
{
    fscanf(fill[i],"%s",line);
    //  if(strcmp(line,word)==0)
    if(strstr(line,word)!=0)
    {
        match[i]++;
        matchh[i]++;
        flg[i]=1;
    }
    cnt++;
    cntt[i]++;
}

fclose(fill[i]);
}

cout<<"\n----------------Total # of Word-------------------\n";

for(i=1;i<4;i++)
{

 cout<<"\n Documant "<<i<<" are = "<<cntt[i];

}

cout<<"\n\n------------------Term Counts--------------------\n";

for(i=1;i<4;i++) 
{

cout<<"\n Documant "<<i<<" = "<<matchh[i];

}

for(i=1;i<4;i++)
{
if(matchh[i]>0)
{

matchh[i]=1;
flg[i]=flg[i]+matchh[i];
occurence=occurence+flg[i];

}

else
matchh[i]=0;

}


tf=(float)3/occurence;
idf=log10(tf);

cout<<"\n\n-----------------Inverse Document Frequency-------------------\n";


cout<<"\n IDF = "<<idf;

for(i=1;i<4;i++)

matchh[i]=cntt[i]=flg[i]=0;

cout<<"\n";


}
/**************************************************/



int main()

{


do

{
cout<<"\n************** Menu ****************\n";
cout<<"\n (1) Term Frequency";
cout<<"\n (2) Inverse Document Frequency";
cout<<"\n (3) Exit";
cout<<"\n************************************\n";
cout<<"\n Select from Menu => ";cin>>ch;
switch(ch)

{

这里主要部分我不想输入单词,我想列出每个单词的数量。

    case 1:


            cout<<"\n Enter The Word  => ";
            cin>>word;

            case1();
            break;

    case 2: cout<<"\n Enter The Word  => ";
            cin>>word;
            case2();
            break;

    case 3: exit(0);

}


}

while(ch!=3);

getch();

}

1 个答案:

答案 0 :(得分:1)

使用std::string。在可能的情况下,简单的旧C风格字符串值得避免,因为它们容易被滥用。 C ++字符串更安全,通常更易于使用。

std::string text = "some long line of text";
std::string word = "line";

if (text.find(word) != std::string::npos)
  std::cout << "Found the word!\n";

如果您有多个单词:

std::vector<std::string> words = { "one", "word", "is", "badgers" };

for (const auto& word : words)
  if (text.find(word) !=  std::string::npos)
    std::cout << "Found \"" << word << "\"!\n";

如果您希望从文件中加载单词,可以完全使用(相对)安全的C ++ io类和函数来完成。

std::ifstream wordfile("where/your/file/is");
std::string word;

while (std::getline(wordfile, word).good())
{
    words.push_back(word);
}