运行优化代码时缓冲区溢出

时间:2014-08-12 17:59:44

标签: c compiler-optimization buffer-overflow

我有一个使用霍夫曼编码压缩文件的大型C程序。它用gcc编译,当我用优化(-O2或-O1)运行它时,我正在压缩的文件很大(16MB不会导致问题,66MB会这样),几秒钟后我得到一个缓冲区溢出以下消息:

*** buffer overflow detected ***: ./Huffman terminated
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x741df)[0x7f75eaf681df]
/lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x5c)[0x7f75eafffbac]
/lib/x86_64-linux-gnu/libc.so.6(+0x10aa70)[0x7f75eaffea70]
./Huffman[0x401da9]
./Huffman[0x401e96]
./Huffman[0x400d75]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5)[0x7f75eaf15ec5]
./Huffman[0x400da0]
======= Memory map: ========
00400000-00404000 r-xp 00000000 08:07 1050895                            /home/shoham/workspace/Huffman/build-Huffman-Desktop-Release/Huffman
00603000-00604000 r--p 00003000 08:07 1050895                            /home/shoham/workspace/Huffman/build-Huffman-Desktop-Release/Huffman
00604000-00605000 rw-p 00004000 08:07 1050895                            /home/shoham/workspace/Huffman/build-Huffman-Desktop-Release/Huffman
01c41000-7fc59000 rw-p 00000000 00:00 0                                  [heap]
7f756ae98000-7f756aeae000 r-xp 00000000 08:05 135031                     /lib/x86_64-linux-gnu/libgcc_s.so.1
7f756aeae000-7f756b0ad000 ---p 00016000 08:05 135031                     /lib/x86_64-linux-gnu/libgcc_s.so.1
7f756b0ad000-7f756b0ae000 rw-p 00015000 08:05 135031                     /lib/x86_64-linux-gnu/libgcc_s.so.1
7f756b0ae000-7f75eaef4000 rw-p 00000000 00:00 0 
7f75eaef4000-7f75eb0b0000 r-xp 00000000 08:05 144667                     /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb0b0000-7f75eb2af000 ---p 001bc000 08:05 144667                     /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb2af000-7f75eb2b3000 r--p 001bb000 08:05 144667                     /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb2b3000-7f75eb2b5000 rw-p 001bf000 08:05 144667                     /lib/x86_64-linux-gnu/libc-2.19.so
7f75eb2b5000-7f75eb2ba000 rw-p 00000000 00:00 0 
7f75eb2ba000-7f75eb3bf000 r-xp 00000000 08:05 144663                     /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb3bf000-7f75eb5be000 ---p 00105000 08:05 144663                     /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb5be000-7f75eb5bf000 r--p 00104000 08:05 144663                     /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb5bf000-7f75eb5c0000 rw-p 00105000 08:05 144663                     /lib/x86_64-linux-gnu/libm-2.19.so
7f75eb5c0000-7f75eb5e3000 r-xp 00000000 08:05 144655                     /lib/x86_64-linux-gnu/ld-2.19.so
7f75eb6b7000-7f75eb7bb000 rw-p 00000000 00:00 0 
7f75eb7dd000-7f75eb7e2000 rw-p 00000000 00:00 0 
7f75eb7e2000-7f75eb7e3000 r--p 00022000 08:05 144655                     /lib/x86_64-linux-gnu/ld-2.19.so
7f75eb7e3000-7f75eb7e4000 rw-p 00023000 08:05 144655                     /lib/x86_64-linux-gnu/ld-2.19.so
7f75eb7e4000-7f75eb7e5000 rw-p 00000000 00:00 0 
7fff6c1d4000-7fff6c1f5000 rw-p 00000000 00:00 0                          [stack]
7fff6c1fe000-7fff6c200000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
Aborted (core dumped)

我认为可能相关的代码在这里,我在那里阅读原始文件并编写新文件:

void writeHuffmanTree(HNode * huffmanTree, FILE * read, FILE * write) // reads the input file for which char to write, then searches for it in the huffman tree and writes it into the new file.
{
    Map map;
    unsigned int i, j, currentLocationRead, lengthRead, readBytesNumber, writeBufferIndex;
    unsigned char remainingCodeLength, writeBufferBitIndex, headerSize;
    char * readBuffer, *writeBuffer /* because File IO is slower than Memory IO*/, *header, currentChar;
    currentLocationRead = 0;
    lengthRead = getFileLength(read);
    readBuffer = (char *) calloc(1048576, sizeof(char));
    writeBuffer = (char *) calloc(1048576, sizeof(char));
    if (!readBuffer) quit(7, "writeHuffmanTree() - readBuffer");
    if (!writeBuffer) quit(7, "writeHuffmanTree() - writeBuffer");
    writeBufferIndex = 0; // current location in the array (can't simply ++ it because you may have codes that are 4 in length or something like that
    writeBufferBitIndex = 0; // current free bit index, from here on you can write data.
    map = *getCodeMap(huffmanTree, 0, 0);
    header = createHeader(huffmanTree, FALSE, writeBufferBitIndex, &headerSize);
    fwrite(header, sizeof(char), headerSize, write);
#ifdef debug
    printMap(&map);
#endif
    fseek(read, 0, SEEK_SET);
    while (currentLocationRead < lengthRead)
    {
        readBytesNumber = fread(readBuffer, sizeof(char), 1048576, read); // 1 MB at a time.
        for (i = 0; i < readBytesNumber; i++) // iterate through each character you read from the file
        {
            currentChar = readBuffer[i];
            for (j = 0; j < map.counter; j++) // find said char in the code map to get its code
            {
                if (currentChar == map.character[j]) // when it finds it
                {
                    if ((remainingCodeLength = addCode(writeBuffer, &writeBufferIndex, &writeBufferBitIndex, map.code[j], map.codeLength[j])))
                    { // didn't write the whole code, not enough place in buffer
                        fwrite(writeBuffer, sizeof(char), writeBufferIndex, write); // write the current buffer into the file
                        writeBuffer = (char *) memset(writeBuffer, 0, writeBufferIndex); // reset the buffer
                        writeBufferIndex = 0;
                        addCode(writeBuffer, &writeBufferIndex, &writeBufferBitIndex, map.code[j], remainingCodeLength); //TODO: validate
                    }
                    break;
                }
            }
        }
        currentLocationRead += readBytesNumber;
#ifdef debug
        printf("\n");
        for (i = 0; i < writeBufferIndex; i++)
            printf("%d, ", writeBuffer[i]);
#endif
    }
    fwrite(writeBuffer, sizeof(char), writeBufferIndex, write); // at the end add whatever remains in the buffer to the file
    if (writeBufferBitIndex != 0)
    {
        fseek(write, 1, SEEK_SET);
        fwrite(&writeBufferBitIndex, 1, sizeof(char), write); // modify the header
    }
    free(writeBuffer);
}

编辑:这是addCode():

unsigned char /* remaining code length not inputted */ addCode(char * buffer, unsigned int * currentIndex, unsigned char * currentBitIndex, unsigned char code, unsigned char codeLength)
{
    unsigned char bit;
    unsigned short * twobytes;
    unsigned char bound = codeLength;
    if (*currentBitIndex + codeLength > 8 && *currentIndex >= 1048575) // not enough place for the entire code in the entire buffer
    {
        bound = (*currentBitIndex + codeLength) - (8 - 1 /* (-1) to get index */);
    }
    twobytes = (unsigned short *) (buffer - 1 + *currentIndex); // use a short (2 bytes) to eliminate the problem where you need to change the byte you currently access.
    while (bound > 0)
    {
        bit = READBIT(code, --bound); // TODO: check if use bound or codeLength here
        codeLength--;
        *twobytes = SETBIT(*twobytes, 16 - ++(*currentBitIndex), bit); // (16 - ...) to start from the MSB.
        if (*currentBitIndex == 8) {
            (*currentIndex)++;
            (*currentBitIndex) %= 8;
        }
    }
    return codeLength - bound;
}

我很无能,因为它在调试模式下工作正常(没有优化)...... 我尝试在上面运行valgrind,但我的计算机卡住了,我甚至无法移动鼠标,所以我重新启动它..

任何帮助都将不胜感激。

2 个答案:

答案 0 :(得分:2)

您可以尝试以下几种方法:

  1. 使用关键变量的值添加printf语句,以确保它们符合您的想法。这也可以帮助您找出代码实际崩溃的位置。这是帮助您了解正在发生的事情的唯一最佳策略。
  2. 更好地理解指针 - 为什么使用memset的返回值?没有必要这样做。正如另一位评论者指出的那样,没有理由取消引用getCodeMap的返回值。 sizeof(char)定义为1。
  3. 打开警告进行编译。修复所有警告。
  4. 代码中还存在一些小问题:您不能免费使用readBuffer;你不应该对字符使用线性搜索(只需将它们放在256个元素的数组中并进行直接查找);你应该检查fwrite的返回值; fread / fwrite / etc已经进行了缓冲,所以你不需要大缓冲区(如果你关心效率,最简单的选择就是mmap);你也可以在更窄的范围内声明这些变量,这将更清晰。

答案 1 :(得分:0)

我发现了问题...... 这一行:

bound = (*currentBitIndex + codeLength) - (8 - 1 /* (-1) to get index */);

必须:

bound = (*currentBitIndex + codeLength) - 8 - 1 /* (-1) to get index */;

感谢想要帮助的傻瓜,你让我朝着正确的方向前进。