我正在编写一个c ++应用程序来实现这个:
给定一个用英文写的任意文本文档,编写一个将生成一个的程序 一致性,即用词频标记的所有单词出现的字母顺序列表。 奖励:用每个出现的句号标记每个单词。
但是每次它以核心转储结束时,我都看到了堆栈跟踪,这是调用free()的一些错误。我已多次读取我的代码,找不到错误。有人可以帮忙吗?
由于
输入:doc.txt
Given an arbitrary text document written in English, write a program that will generate a
concordance, i.e. an alphabetical list of all word occurrences, labeled with word frequencies.
Bonus: label each word with the sentence numbers in which each occurrence appeared.
DocParser.h:
#include <vector>
#include <cstring>
#include <string>
#include <iostream>
#include <fstream>
#include <map>
#include <ext/hash_map>
#include <ext/hash_set>
namespace std { using namespace __gnu_cxx; }
using namespace std;
struct ltstr
{
bool operator()(char* s1, char* s2) const
{
return strcmp(s1, s2) < 0;
}
};
class DocParser {
public:
DocParser (const char* file, const hash_set<char>& lineSeparators);
~DocParser ();
void Parse ();
void PrintResult ();
private:
struct Record {
int numOccurance;
vector<int> sentences;
Record (int num, int sentenceId) {
numOccurance = num;
sentences.push_back(sentenceId);
}
};
//typedef hash_map<char*, Record*, hash<char*>, ltstr> Map;
typedef map<char*, Record*, ltstr> Map;
ifstream inFile;
Map wordMap;
hash_set<char> lineSeparators;
void Increment(char* word, int sentenceId);
};
DocParser.cpp:
#include "DocParser.h"
DocParser::DocParser (const char* fileName, const hash_set<char>& lineSeparators)
: inFile (fileName), lineSeparators (lineSeparators) {
}
DocParser::~DocParser () {
if (inFile.is_open()) {
inFile.close();
}
Map::iterator it;
for ( it = wordMap.begin(); it != wordMap.end(); it++ ) {
delete (char*)it->first;
delete (Record*)it->second;
}
}
bool IsAB (char c) {
if ( c <= 'z' && c >= 'a' || c <= 'Z' && c>='A')
return true;
return false;
}
bool InWord (char* buf, int wordBegin, int currentIndex) {
bool ret;
if ( IsAB(buf[currentIndex]) )
ret = true;
else {
if ( wordBegin > 0 && buf[currentIndex] == '.' &&
(currentIndex - 1 == wordBegin || buf[currentIndex - 2] == '.') ) {
ret = true;
} else {
ret = false;
}
}
return ret;
}
void DocParser::Parse () {
char buf[1024];
int sentenceId = 1;
while (!inFile.eof()) {
inFile.getline(buf, sizeof(buf));
cout << buf << endl;
int wordBegin = -1;
int len = strlen(buf);
int index = 0;
while (index < len) {
if ( InWord(buf, wordBegin, index) ) {
if (wordBegin == -1) wordBegin = index;
} else {
char currentChar = buf[index];
if (wordBegin != -1) {
buf[index] = 0;
Increment(&buf[wordBegin], sentenceId);
wordBegin = -1;
}
if ( lineSeparators.find(currentChar) != lineSeparators.end() ) {
sentenceId++;
}
}
index++;
}
if (wordBegin != -1) {
Increment(&buf[wordBegin], sentenceId);
}
}
}
void DocParser::Increment (char* key, int sentenceId) {
Map::iterator it = wordMap.find(key);
if (it == wordMap.end()) {
char* buf = new char[ strlen(key) ];
strcpy(buf, key);
wordMap[buf] = new Record(1, sentenceId);
} else {
it->second->numOccurance++;
it->second->sentences.push_back(sentenceId);
}
}
void DocParser::PrintResult () {
Map::iterator it;
for ( it = wordMap.begin(); it != wordMap.end(); it++ ) {
cout << it->first << "\t\t" ;
cout << "{" << it->second->numOccurance << ":" ;
cout << it->second->sentences[0] ;
for (int i = 1; i < it->second->sentences.size(); i++) {
cout << "," ;
cout << it->second->sentences[i];
}
cout << "}" << "\n";
}
}
主:
#include "DocParser.h"
int main () {
char separators[] = {'!', '.', '?', };
hash_set<char> lineSeparators(separators, separators + 3);
DocParser p("doc.txt", lineSeparators);
p.Parse();
p.PrintResult();
}
错误消息:
*** glibc detected *** ./a.out: free(): invalid next size (fast): 0x08870868 ***
======= Backtrace: =========
/lib/libc.so.6[0x3f3fb6]
/usr/lib/libstdc++.so.6(_ZdlPv+0x22)[0x79e3fc2]
./a.out[0x8048ec4]
./a.out[0x804c580]
/lib/libc.so.6(__libc_start_main+0xe6)[0x39be36]
./a.out[0x8048d51]
======= Memory map: ========
00364000-00381000 r-xp 00000000 fd:00 47857 /lib/ld-2.13.so
00381000-00382000 r--p 0001c000 fd:00 47857 /lib/ld-2.13.so
00382000-00383000 rw-p 0001d000 fd:00 47857 /lib/ld-2.13.so
00385000-00508000 r-xp 00000000 fd:00 47858 /lib/libc-2.13.so
00508000-00509000 ---p 00183000 fd:00 47858 /lib/libc-2.13.so
00509000-0050b000 r--p 00183000 fd:00 47858 /lib/libc-2.13.so
0050b000-0050c000 rw-p 00185000 fd:00 47858 /lib/libc-2.13.so
0050c000-0050f000 rw-p 00000000 00:00 0
00540000-00568000 r-xp 00000000 fd:00 72273 /lib/libm-2.13.so
00568000-00569000 r--p 00027000 fd:00 72273 /lib/libm-2.13.so
00569000-0056a000 rw-p 00028000 fd:00 72273 /lib/libm-2.13.so
0065f000-00660000 r-xp 00000000 00:00 0 [vdso]
006b4000-006d0000 r-xp 00000000 fd:00 72267 /lib/libgcc_s-4.5.1-20100924.so.1
006d0000-006d1000 rw-p 0001b000 fd:00 72267 /lib/libgcc_s-4.5.1-20100924.so.1
07936000-07a19000 r-xp 00000000 fd:00 72317 /usr/lib/libstdc++.so.6.0.14
07a19000-07a1d000 r--p 000e2000 fd:00 72317 /usr/lib/libstdc++.so.6.0.14
07a1d000-07a1f000 rw-p 000e6000 fd:00 72317 /usr/lib/libstdc++.so.6.0.14
07a1f000-07a25000 rw-p 00000000 00:00 0
08048000-08051000 r-xp 00000000 fd:00 411297 /home/leon/Projects/sem/bridge_water/a.out
08051000-08052000 rw-p 00008000 fd:00 411297 /home/leon/Projects/sem/bridge_water/a.out
0886e000-0888f000 rw-p 00000000 00:00 0 [heap]
b78a5000-b78a8000 rw-p 00000000 00:00 0
b78b6000-b78b8000 rw-p 00000000 00:00 0
bfe35000-bfe56000 rw-p 00000000 00:00 0 [stack]
Aborted (core dumped)
答案 0 :(得分:7)
char* buf = new char[ strlen(key) ];
strcpy(buf, key);
你没有为空终结者留下足够的空间。
但请使用C ++字符串。