我有一个使用霍夫曼编码压缩文件的大型C程序。它用gcc编译,当我用优化(-O2或-O1)运行它时,我正在压缩的文件很大(16MB不会导致问题,66MB会这样),几秒钟后我得到一个缓冲区溢出以下消息:
*** buffer overflow detected ***: ./Huffman terminated
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x741df)[0x7f75eaf681df]
/lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x5c)[0x7f75eafffbac]
/lib/x86_64-linux-gnu/libc.so.6(+0x10aa70)[0x7f75eaffea70]
./Huffman[0x401da9]
./Huffman[0x401e96]
./Huffman[0x400d75]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5)[0x7f75eaf15ec5]
./Huffman[0x400da0]
======= Memory map: ========
00400000-00404000 r-xp 00000000 08:07 1050895 /home/shoham/workspace/Huffman/build-Huffman-Desktop-Release/Huffman
00603000-00604000 r--p 00003000 08:07 1050895 /home/shoham/workspace/Huffman/build-Huffman-Desktop-Release/Huffman
00604000-00605000 rw-p 00004000 08:07 1050895 /home/shoham/workspace/Huffman/build-Huffman-Desktop-Release/Huffman
01c41000-7fc59000 rw-p 00000000 00:00 0 [heap]
7f756ae98000-7f756aeae000 r-xp 00000000 08:05 135031 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f756aeae000-7f756b0ad000 ---p 00016000 08:05 135031 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f756b0ad000-7f756b0ae000 rw-p 00015000 08:05 135031 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f756b0ae000-7f75eaef4000 rw-p 00000000 00:00 0
7f75eaef4000-7f75eb0b0000 r-xp 00000000 08:05 144667 /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb0b0000-7f75eb2af000 ---p 001bc000 08:05 144667 /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb2af000-7f75eb2b3000 r--p 001bb000 08:05 144667 /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb2b3000-7f75eb2b5000 rw-p 001bf000 08:05 144667 /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb2b5000-7f75eb2ba000 rw-p 00000000 00:00 0
7f75eb2ba000-7f75eb3bf000 r-xp 00000000 08:05 144663 /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb3bf000-7f75eb5be000 ---p 00105000 08:05 144663 /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb5be000-7f75eb5bf000 r--p 00104000 08:05 144663 /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb5bf000-7f75eb5c0000 rw-p 00105000 08:05 144663 /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb5c0000-7f75eb5e3000 r-xp 00000000 08:05 144655 /lib/x86_64-linux-gnu/ld-2.19.so
7f75eb6b7000-7f75eb7bb000 rw-p 00000000 00:00 0
7f75eb7dd000-7f75eb7e2000 rw-p 00000000 00:00 0
7f75eb7e2000-7f75eb7e3000 r--p 00022000 08:05 144655 /lib/x86_64-linux-gnu/ld-2.19.so
7f75eb7e3000-7f75eb7e4000 rw-p 00023000 08:05 144655 /lib/x86_64-linux-gnu/ld-2.19.so
7f75eb7e4000-7f75eb7e5000 rw-p 00000000 00:00 0
7fff6c1d4000-7fff6c1f5000 rw-p 00000000 00:00 0 [stack]
7fff6c1fe000-7fff6c200000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
Aborted (core dumped)
我认为可能相关的代码在这里,我在那里阅读原始文件并编写新文件:
void writeHuffmanTree(HNode * huffmanTree, FILE * read, FILE * write) // reads the input file for which char to write, then searches for it in the huffman tree and writes it into the new file.
{
Map map;
unsigned int i, j, currentLocationRead, lengthRead, readBytesNumber, writeBufferIndex;
unsigned char remainingCodeLength, writeBufferBitIndex, headerSize;
char * readBuffer, *writeBuffer /* because File IO is slower than Memory IO*/, *header, currentChar;
currentLocationRead = 0;
lengthRead = getFileLength(read);
readBuffer = (char *) calloc(1048576, sizeof(char));
writeBuffer = (char *) calloc(1048576, sizeof(char));
if (!readBuffer) quit(7, "writeHuffmanTree() - readBuffer");
if (!writeBuffer) quit(7, "writeHuffmanTree() - writeBuffer");
writeBufferIndex = 0; // current location in the array (can't simply ++ it because you may have codes that are 4 in length or something like that
writeBufferBitIndex = 0; // current free bit index, from here on you can write data.
map = *getCodeMap(huffmanTree, 0, 0);
header = createHeader(huffmanTree, FALSE, writeBufferBitIndex, &headerSize);
fwrite(header, sizeof(char), headerSize, write);
#ifdef debug
printMap(&map);
#endif
fseek(read, 0, SEEK_SET);
while (currentLocationRead < lengthRead)
{
readBytesNumber = fread(readBuffer, sizeof(char), 1048576, read); // 1 MB at a time.
for (i = 0; i < readBytesNumber; i++) // iterate through each character you read from the file
{
currentChar = readBuffer[i];
for (j = 0; j < map.counter; j++) // find said char in the code map to get its code
{
if (currentChar == map.character[j]) // when it finds it
{
if ((remainingCodeLength = addCode(writeBuffer, &writeBufferIndex, &writeBufferBitIndex, map.code[j], map.codeLength[j])))
{ // didn't write the whole code, not enough place in buffer
fwrite(writeBuffer, sizeof(char), writeBufferIndex, write); // write the current buffer into the file
writeBuffer = (char *) memset(writeBuffer, 0, writeBufferIndex); // reset the buffer
writeBufferIndex = 0;
addCode(writeBuffer, &writeBufferIndex, &writeBufferBitIndex, map.code[j], remainingCodeLength); //TODO: validate
}
break;
}
}
}
currentLocationRead += readBytesNumber;
#ifdef debug
printf("\n");
for (i = 0; i < writeBufferIndex; i++)
printf("%d, ", writeBuffer[i]);
#endif
}
fwrite(writeBuffer, sizeof(char), writeBufferIndex, write); // at the end add whatever remains in the buffer to the file
if (writeBufferBitIndex != 0)
{
fseek(write, 1, SEEK_SET);
fwrite(&writeBufferBitIndex, 1, sizeof(char), write); // modify the header
}
free(writeBuffer);
}
编辑:这是addCode():
unsigned char /* remaining code length not inputted */ addCode(char * buffer, unsigned int * currentIndex, unsigned char * currentBitIndex, unsigned char code, unsigned char codeLength)
{
unsigned char bit;
unsigned short * twobytes;
unsigned char bound = codeLength;
if (*currentBitIndex + codeLength > 8 && *currentIndex >= 1048575) // not enough place for the entire code in the entire buffer
{
bound = (*currentBitIndex + codeLength) - (8 - 1 /* (-1) to get index */);
}
twobytes = (unsigned short *) (buffer - 1 + *currentIndex); // use a short (2 bytes) to eliminate the problem where you need to change the byte you currently access.
while (bound > 0)
{
bit = READBIT(code, --bound); // TODO: check if use bound or codeLength here
codeLength--;
*twobytes = SETBIT(*twobytes, 16 - ++(*currentBitIndex), bit); // (16 - ...) to start from the MSB.
if (*currentBitIndex == 8) {
(*currentIndex)++;
(*currentBitIndex) %= 8;
}
}
return codeLength - bound;
}
我很无能,因为它在调试模式下工作正常(没有优化)...... 我尝试在上面运行valgrind,但我的计算机卡住了,我甚至无法移动鼠标,所以我重新启动它..
任何帮助都将不胜感激。
答案 0 :(得分:2)
您可以尝试以下几种方法:
代码中还存在一些小问题:您不能免费使用readBuffer;你不应该对字符使用线性搜索(只需将它们放在256个元素的数组中并进行直接查找);你应该检查fwrite的返回值; fread / fwrite / etc已经进行了缓冲,所以你不需要大缓冲区(如果你关心效率,最简单的选择就是mmap);你也可以在更窄的范围内声明这些变量,这将更清晰。
答案 1 :(得分:0)
我发现了问题...... 这一行:
bound = (*currentBitIndex + codeLength) - (8 - 1 /* (-1) to get index */);
必须:
bound = (*currentBitIndex + codeLength) - 8 - 1 /* (-1) to get index */;
感谢想要帮助的傻瓜,你让我朝着正确的方向前进。