这是我的代码中运行REALLY SLOW的两个函数。 基本上我在文档名称中读取,打开文档,然后一次处理一个单词。我需要将文档拆分成句子,并给每个句子一个哈希表,表示单词出现在句子中的次数。我还需要跟踪所有新单词,以及总文档的哈希表。
当我现在在10个文档上运行我的代码时,总共有8000个单词,2100个uniq单词需要大约8000+秒才能运行...每个单词差不多1秒。
你能告诉我if(istream.good())
应该花多长时间?
或者,如果你能告诉我什么时候延迟我的代码。如果某个部分不清楚,请告诉我,我会帮忙。
P.S。您可以在代码中看到我有start = clock()
和end = clock()
注释,它会不断返回< 1毫秒。这就是思维障碍
void DocProcess::indexString(string sentenceString, hash * sent){
stringstream iss;
string word;
iss.clear();
iss << sentenceString;
while(iss.good())
{
iss >> word;
word = formatWord(word);
std::unordered_map<std::string,int>::const_iterator IsNewWord = words.find(word);
if(IsNewWord == words.end())
{
std::pair<std::string,int> newWordPair (word,0);
std::pair<std::string,int> newWordPairPlusOne (word,1);
words.insert(newWordPair);
sent->insert(newWordPairPlusOne);
}
else
{
std::pair<std::string,int> newWordPairPlusOne (word,1);
sent->insert(newWordPairPlusOne);
}
}
} void DocProcess :: indexFile(string iFileName){
hash newDocHash;
hash newSentHash;
scoreAndInfo sentenceScore;
scoreAndInfo dummy;
fstream iFile;
fstream dFile;
string word;
string newDoc;
string fullDoc;
int minSentenceLength = 5;
int docNumber = 1;
int runningLength = 0;
int ProcessedWords = 0;
stringstream iss;
iFile.open(iFileName.c_str());
if(iFile.is_open())
{
while(iFile.good())
{
iFile >> newDoc;
dFile.open(newDoc.c_str());
DocNames.push_back(newDoc);
if(dFile.is_open())
{
scoreAndInfo documentScore;
//iss << dFile.rdbuf();
while(dFile.good())
{
//start = clock();
dFile >> word;
++ProcessedWords;
std::unordered_map<std::string,int>::const_iterator IsStopWord = stopWords.find(word);
if(runningLength >= minSentenceLength && IsStopWord != stopWords.end() || word[word.length()-1] == '.')
{
/* word is in the stop list, process the string*/
documentScore.second.second.append(" "+word);
sentenceScore.second.second.append(" "+word);
indexString(sentenceScore.second.second, &sentenceScore.second.first);
sentenceScore.first=0.0;
SentList.push_back(sentenceScore);
sentenceScore.second.first.clear(); //Clear hash
sentenceScore.second.second.clear(); // clear string
//sentenceScore = dummy;
runningLength = 0;
}
else
{
++runningLength;
sentenceScore.second.second.append(" "+word);
documentScore.second.second.append(" "+word);
}
//end = clock();
system("cls");
cout << "Processing doc number: " << docNumber << endl
<< "New Word count: " << words.size() << endl
<< "Total words: " << ProcessedWords << endl;
//<< "Last process time****: " << double(diffclock(end,start)) << " ms"<< endl;
}
indexString(documentScore.second.second, &documentScore.second.first);
documentScore.first=0.0;
DocList.push_back(documentScore);
dFile.close();
//iss.clear();
//documentScore = dummy;
++docNumber;
//end = clock();
system("cls");
cout << "Processing doc number: " << docNumber << endl
<< "Word count: " << words.size();
//<< "Last process time: " << double(diffclock(end,start)) << " ms"<< endl;
}
}
iFile.close();
}
else{ cout << "Unable to open index file: "<<endl <<iFileName << endl;}
} `
答案 0 :(得分:2)
你能否在没有
的情况下尝试 system("cls");
在任何循环中?这肯定没有帮助,这是一个昂贵的电话。
答案 1 :(得分:0)
要快速清除屏幕,而不是system("cls");
,请尝试cout << '\f';
。