我已经制作了一个程序,可以输出txt文件中最常用的单词。 有没有人知道如何优化它,它会更大 文件更快。
输出图像:http://i.stack.imgur.com/fVBh0.png
这是代码。
#include <iostream>
#include <string>
#include <fstream>
#include <cstdlib>
#include <vector>
#include <algorithm>
#include <math.h>
using namespace std;
int main()
{
ifstream in("file.txt");
if(!in){
cerr << "Could not open file.txt.";
return EXIT_FAILURE;
}
string str, str2, strn, tab[10000], tab2[10000];
int i, k, j, n, l, tabl;
char c = 179;
vector<int> tabs;
vector<string> stringi;
while(getline(in, str2)){
str += str2;
str += ' ';
}
k = 0;
for(i = 0; i < str.length(); i++){
if(str[i] != ' ' && str[i] != '.' && str[i] != '\t' && str[i] != ','
&& str[i] != ';' && str[i] != ':' && str[i] != '}' && str[i] != '{'){
tab[k] += tolower(str[i]);
}else{
k++;
}
if(str[i] == '.' || str[i] == '\t' || str[i] == ',' || str[i] == ';'
|| str[i] == ':' || str[i] == '}' || str[i] == '{') {
k--;
}
}
tabl = k;
k = 0;
for(i = 0; i < tabl; i++){
for(j = 0; j < tabl; j++){
if(tab[i] == tab[j]){
k++;
}
}
tabs.push_back(k);
k = 0;
}
for(i = 0; i < tabl; i++){
for(j = 0; j < tabl-1; j++){
if(tab[j] < tab[j+1]){
n = tabs.at(j);
tabs.at(j) = tabs.at(j+1);
tabs.at(j+1) = n;
strn = tab[j];
tab[j] = tab[j+1];
tab[j+1] = strn;
}
}
}
for(i = 0; i < tabl; i++){
for(j = 0; j < tabl-1; j++){
if(tabs.at(j) < tabs.at(j+1)){
n = tabs.at(j);
tabs.at(j) = tabs.at(j+1);
tabs.at(j+1) = n;
strn = tab[j];
tab[j] = tab[j+1];
tab[j+1] = strn;
}
}
}
tab2[0] = tab[0];
for(i = 0; i < tabl; i++){
if(tab[i] != tab[i+1]){
tab2[i] = tab[i+1];
}
}
k = 1;
l++;
for(i = 0; i < tabl; i++){
if(!tab2[i].empty()){
l++;
}
}
cout << "------------------------------------" << endl;
cout << "|--->TABLE OF MOST COMMON WORDS<---|" << endl;
cout << "------------------------------------" << endl;
for(i = 0; i < tabl; i++){
if(!tab2[i].empty() && k <= 20 ){
cout << c << k++ << "." << '\t' << c << tab2[i] << '\t' << c << "*" <<
tabs.at(i+1)
<< '\t' << c << roundf(((float)tabs.at(i+1)*100/l)*100)/100 << "%" <<
endl;
}
}
cout << "------------------------------------" << endl ;
cout << "|----->Dif. strings: " << '\t' << l << "<-------|" << endl ;
cout << "------------------------------------" << endl;
return 0;
}
答案 0 :(得分:0)
由于这并不是一次尝试读取整个文件,因此文件大小的唯一上限是您愿意等待输出的时间。
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <map>
#include <algorithm>
char convert(char arg)
{
if (arg == '.' || arg == '\t' || arg == ',' || arg == ';'
|| arg == ':' || arg == '}' || arg == '{')
{
return ' '; // convert delimiters to spaces
}
return(tolower(arg)); // convert everything else to lower case
}
int main()
{
std::ifstream in("c:\\etc\\foo.txt");
// This can be replaced with std::unordered_map if you are willing to sacrifice
// lexical sorting of the output for speed
std::map<std::string, int> counts;
while (!in.eof())
{
std::string str;
// Get the next space delimited word from the file
in >> str;
if (str == "")
{
// Reject empty strings, which can occur as we read the file
continue;
}
// Convert to lower case, and convert our delimiter set to spaces
std::transform(str.begin(), str.end(), str.begin(), convert);
std::stringstream in1(str);
while (!in1.eof())
{
std::string word;
in1 >> word;
if (word == "")
{
// reject empty words, which are also possible at this point
continue;
}
// Use the map to count occurrences of the word
auto it = counts.find(word);
if (it == counts.end())
{
counts[word] = 1;
}
else
{
it->second++;
}
}
}
// Output the results here
return 0;
}