如何将此c ++代码“char word [50]”更改为“string word [50]”,因为我想计算并计算文本中的每个单词,而不仅仅是我的输入单词。关于TF / IDF计算的所有计算。
#include <iostream>
#include <stdio.h>
#include <conio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <iomanip>
#include <cmath>
using namespace std;
/**************************************************/
这里有char字[50];
int ch,match[3],cnt,i,cntt[3],matchh[3],occurence,flg[3];
float tf,idf;
char word[50],line[50];
int nd=3,iterm;
int doc[20];
/**************************************************/
void case1()
{
FILE *fill[3];
fill[1]=fopen("doc1.txt","r");
fill[2]=fopen("doc2.txt","r");
fill[3]=fopen("doc3.txt","r");
match[i]=0;
cnt=0;
我需要在这里更改单词,当我将字[50]改为字符串[50]时,会给出一些错误
for(i=1;i<4;i++)
{
while(!feof(fill[i]))
{
fscanf(fill[i],"%s",line);
// if(strcmp(line,word)==0)
if(strstr(line,word)!=0)
match[i]++;
cntt[i]++;
}
fclose(fill[i]);
}
cout<<"\n----------------Total # of Word-------------------\n";
for(i=1;i<4;i++)
{
cout<<"\n Documant "<<i<<" = "<<cntt[i];
}
cout<<"\n\n------------------Term Counts--------------------\n";
cout<<word<<" ";
cout<<"(";
for(i=1;i<4;i++)
{
cout<<"Doc"<<i<<","<<match[i]<<" ; ";
}
cout<<")";
cout << "\n\n\n | Words | D1 | D2 | D3 | \n";
cout<<"----------------------------------------------------------\n";
cout<<" "<<word;
for(i=1;i<4;i++)
{
tf=(float)match[i]/cntt[i]; //Term Frequency
cout<<" "<<tf;
}
cout<<"\n\n\n\n\n\n\n";
}
/**************************************************/
void case2()
{
FILE *fill[3];
fill[1]=fopen("doc1.txt","r");
fill[2]=fopen("doc2.txt","r");
fill[3]=fopen("doc3.txt","r");
match[i]=0;
cnt=0;
cout<<"\n Total Number Of Documants => 3";
for(i=1;i<4;i++)
{
while(!feof(fill[i]))
{
fscanf(fill[i],"%s",line);
// if(strcmp(line,word)==0)
if(strstr(line,word)!=0)
{
match[i]++;
matchh[i]++;
flg[i]=1;
}
cnt++;
cntt[i]++;
}
fclose(fill[i]);
}
cout<<"\n----------------Total # of Word-------------------\n";
for(i=1;i<4;i++)
{
cout<<"\n Documant "<<i<<" are = "<<cntt[i];
}
cout<<"\n\n------------------Term Counts--------------------\n";
for(i=1;i<4;i++)
{
cout<<"\n Documant "<<i<<" = "<<matchh[i];
}
for(i=1;i<4;i++)
{
if(matchh[i]>0)
{
matchh[i]=1;
flg[i]=flg[i]+matchh[i];
occurence=occurence+flg[i];
}
else
matchh[i]=0;
}
tf=(float)3/occurence;
idf=log10(tf);
cout<<"\n\n-----------------Inverse Document Frequency-------------------\n";
cout<<"\n IDF = "<<idf;
for(i=1;i<4;i++)
matchh[i]=cntt[i]=flg[i]=0;
cout<<"\n";
}
/**************************************************/
int main()
{
do
{
cout<<"\n************** Menu ****************\n";
cout<<"\n (1) Term Frequency";
cout<<"\n (2) Inverse Document Frequency";
cout<<"\n (3) Exit";
cout<<"\n************************************\n";
cout<<"\n Select from Menu => ";cin>>ch;
switch(ch)
{
这里主要部分我不想输入单词,我想列出每个单词的数量。
case 1:
cout<<"\n Enter The Word => ";
cin>>word;
case1();
break;
case 2: cout<<"\n Enter The Word => ";
cin>>word;
case2();
break;
case 3: exit(0);
}
}
while(ch!=3);
getch();
}
答案 0 :(得分:1)
使用std::string
。在可能的情况下,简单的旧C风格字符串值得避免,因为它们容易被滥用。 C ++字符串更安全,通常更易于使用。
std::string text = "some long line of text";
std::string word = "line";
if (text.find(word) != std::string::npos)
std::cout << "Found the word!\n";
如果您有多个单词:
std::vector<std::string> words = { "one", "word", "is", "badgers" };
for (const auto& word : words)
if (text.find(word) != std::string::npos)
std::cout << "Found \"" << word << "\"!\n";
如果您希望从文件中加载单词,可以完全使用(相对)安全的C ++ io类和函数来完成。
std::ifstream wordfile("where/your/file/is");
std::string word;
while (std::getline(wordfile, word).good())
{
words.push_back(word);
}