zlib和gzip生成不同的数据

时间:2014-09-10 00:34:47

标签: c++ c gzip zlib deflate

我有一大堆数据应该是zlib压缩数据(我不是100%肯定)。

我首先尝试通过前缀“1F 8B 08 00 00 00 00 00”用gzip解压缩它。就像在这个主题(https://unix.stackexchange.com/questions/22834/how-to-uncompress-zlib-data-in-unix)的接受答案中一样。它成功了,它可能是正确的方法,因为输出包含许多人类可读的字符串。

然后我尝试使用zlib在c ++程序中实现它。但似乎zlib生成了不同的输出。我错过了什么吗? zlib和gzip应该基本相同(尽管有标题和预告片),不应该吗?或者我的代码中有一个简单的错误? (为简单起见,缩短了数据块)

unsigned char* decompressed;
unsigned char* dataChunk = /*...*/;
printHex(dataChunk, 160);
int error = inflateZlib(dataChunk, 160, decompressed, 1000);
printHex(decompressed, 160);
//zerr(error);

printHex(unsigned char* data, size_t n)
{
    for(size_t i = 0; i < n; i++)
    {
        std::cout << std::hex << (uint16_t)data[i] << " ";
    }
    std::cout << std::dec << "\n-\n";
}

int inflateZlib(unsigned char* data, size_t length, unsigned char* decompressed, size_t maxDecompressed)
{
    decompressed = new unsigned char[maxDecompressed];

    z_stream infstream;
    infstream.zalloc = Z_NULL;
    infstream.zfree = Z_NULL;
    infstream.opaque = Z_NULL;
    infstream.avail_in = (uInt)(length); // size of input
    infstream.next_in = (Bytef *)data; // input char array
    infstream.avail_out = (uInt)maxDecompressed; // size of output
    infstream.next_out = (Bytef *)decompressed; // output char array
    // the actual DE-compression work.
    int ret = inflateInit(&infstream);
    zerr(ret);
    ret = inflate(&infstream, Z_NO_FLUSH);
    zerr(ret);
    inflateEnd(&infstream);

    return ret;
}

这会产生以下输出:

78 9c bd 58 4b 88 23 45 18 ee 3c 67 e3 24 93 cc ae 8a f8 42 10 c4 cb 1a 33 a3 7b f0 60 e6 e0 e6 e0 49 90 bd 29 4d 4d 77 25 dd 99 ee ea de aa ee 4c 32 82 2c e8 c1 93 ac 47 c5 45 f 82 8 5e 16 f ba 78 18 45 d0 83 7 95 15 5c d0 c3 aa b0 b2 ee 65 5c f0 e4 c5 bf aa 1f a9 ea 74 cf 64 7 31 c3 24 9d fa fe bf ea ab ff 59 15 ab 62 6a b5 5d 9b 8c 18 2a 5b 15 47 d3 b4 92 55 35 b5 ba b7 3d c6 46 b0 a3 35 3 1c 50 64 61 93 7a a4 67 d5 0 e1 c2 d8 e4 92 75 fe 56 b3 ca a6 76 c2 f0 1c 8f 
-
0 0 6 c0 83 50 0 0 16 b0 78 9c bd 58 4b 88 23 45 18 ee 3c 67 e3 24 93 cc ae 8a f8 42 10 c4 cb 1a 33 a3 7b f0 60 e6 e0 e6 e0 49 90 bd 29 4d 4d 77 25 dd 99 ee ea de aa ee 4c 32 82 2c e8 c1 93 ac 47 c5 45 f 82 8 5e 16 f ba 78 18 45 d0 83 7 95 15 5c d0 c3 aa b0 b2 ee 65 5c f0 e4 c5 bf aa 1f a9 ea 74 cf 64 7 31 c3 24 9d fa fe bf ea ab ff 59 15 ab 62 6a b5 5d 9b 8c 18 2a 5b 15 47 d3 b4 92 55 35 b5 ba b7 3d c6 46 b0 a3 35 3 1c 50 64 61 93 7a a4 67 d5 0 e1 c2 d8 e4 92 75 
-

这不是我想要的。 而gzip:

printf "\x1f\x8b\x08\x00\x00\x00\x00\x00\x78\x9c\xbd\x58\x4b\x88\x23\x45\x18\xee\x3c\x67\xe3\x24\x93\xcc\xae\x8a\xf8\x42\x10\xc4\xcb\x1a\x33\xa3\x7b\xf0\x60\xe6\xe0\xe6\xe0\x49\x90\xbd\x29\x4d\x4d\x77\x25\xdd\x99\xee\xea\xde\xaa\xee\x4c\x32\x82\x2c\xe8\xc1\x93\xac\x47\xc5\x45\xf\x82\x8\x5e\x16\xf\xba\x78\x18\x45\xd0\x83\x7\x95\x15\x5c\xd0\xc3\xaa\xb0\xb2\xee\x65\x5c\xf0\xe4\xc5\xbf\xaa\x1f\xa9\xea\x74\xcf\x64\x7\x31\xc3\x24\x9d\xfa\xfe\xbf\xea\xab\xff\x59\x15\xab\x62\x6a\xb5\x5d\x9b\x8c\x18\x2a\x5b\x15\x47\xd3\xb4\x92\x55\x35\xb5\xba\xb7\x3d\xc6\x46\xb0\xa3\x35\x3\x1c\x50\x64\x61\x93\x7a\xa4\x67\xd5\x0\xe1\xc2\xd8\xe4\x92\x75\xfe\x56\xb3\xca\xa6\x76\xc2\xf0\x1c\x8f" | gzip -dc | hexdump -C

产生

gzip: stdin: unexpected end of file
00000000  68 03 64 00 05 77 69 6e  67 73 61 02 68 03 6c 00  |h.d..wingsa.h.l.|
00000010  00 00 01 68 04 64 00 06  6f 62 6a 65 63 74 6b 00  |...h.d..objectk.|
00000020  0c 74 65 74 72 61 68 65  64 72 6f 6e 31 68 05 64  |.tetrahedron1h.d|
00000030  00 06 77 69 6e 67 65 64  6c 00 00 00 06 6c 00 00  |..wingedl....l..|
00000040  00 05 68 02 64 00 08 63  6f 6c 6f                 |..h.d..colo|
0000004b

这就是我想要的。

2 个答案:

答案 0 :(得分:1)

我能够使用zlib 1.2.8和inflateInit2函数解码您提供的数据,其中32为windowBits。我根据zlib文档中的这些信息使用了32:

  

windowBits也可以为零,以请求膨胀使用压缩流的zlib头中的窗口大小。

  

向windowBits添加32以使用自动标头检测启用zlib和gzip解码

这是完整的代码。我删除了错误检查,因为我没有zerr功能。您似乎没有使用Visual C ++,因此您需要删除#pragma以避免出现警告。

#include <iostream>
#include <iomanip>
#include <cstdint>
#include <cctype>
#include "zlib.h"

#pragma comment(lib, "zdll.lib")

const size_t block_size = 16;

void printLine(unsigned char* data, size_t offset, size_t n)
{
    if(n)
    {
        std::cout << std::setw(8) << std::setfill('0') << std::right << offset << " ";
        for(size_t x = 0; x < block_size; ++x)
        {
            if(x % (block_size/2) == 0) std::cout << " ";
            uint16_t d = x < n ? data[x] : 0;
            std::cout << std::hex << std::setw(2) << d << " ";
        }
        std::cout << "|";
        for(size_t x = 0; x < block_size; ++x)
        {
            int c = (x < n && isalnum(data[x])) ? data[x] : '.';
            std::cout << static_cast<char>(c);
        }
        std::cout << "|\n";
    }
}

void printHex(unsigned char* data, size_t n)
{
    const size_t blocks = n / block_size;
    const size_t remainder = n % block_size;
    for(size_t i = 0; i < blocks; i++)
    {
        size_t offset = i * block_size;
        printLine(&data[offset], offset, block_size);
    }
    size_t offset = blocks * block_size;
    printLine(&data[offset], offset, remainder);
    std::cout << "\n";
}

int inflateZlib(unsigned char* data, uint32_t length, unsigned char* decompressed, uint32_t maxDecompressed)
{
    z_stream infstream;
    infstream.zalloc = Z_NULL;
    infstream.zfree = Z_NULL;
    infstream.opaque = Z_NULL;
    infstream.avail_in = length;
    infstream.next_in = data;
    infstream.avail_out = maxDecompressed;
    infstream.next_out = decompressed;
    inflateInit2(&infstream, 32);
    inflate(&infstream, Z_FINISH);
    inflateEnd(&infstream);
    return infstream.total_out;
}

int main()
{
    unsigned char dataChunk[] = 
        "\x1f\x8b\x08\x00\x00\x00\x00\x00\x78\x9c\xbd\x58\x4b\x88\x23\x45"
        "\x18\xee\x3c\x67\xe3\x24\x93\xcc\xae\x8a\xf8\x42\x10\xc4\xcb\x1a"
        "\x33\xa3\x7b\xf0\x60\xe6\xe0\xe6\xe0\x49\x90\xbd\x29\x4d\x4d\x77"
        "\x25\xdd\x99\xee\xea\xde\xaa\xee\x4c\x32\x82\x2c\xe8\xc1\x93\xac"
        "\x47\xc5\x45\xf\x82\x8\x5e\x16\xf\xba\x78\x18\x45\xd0\x83\x7\x95"
        "\x15\x5c\xd0\xc3\xaa\xb0\xb2\xee\x65\x5c\xf0\xe4\xc5\xbf\xaa\x1f"
        "\xa9\xea\x74\xcf\x64\x07\x31\xc3\x24\x9d\xfa\xfe\xbf\xea\xab\xff"
        "\x59\x15\xab\x62\x6a\xb5\x5d\x9b\x8c\x18\x2a\x5b\x15\x47\xd3\xb4"
        "\x92\x55\x35\xb5\xba\xb7\x3d\xc6\x46\xb0\xa3\x35\x03\x1c\x50\x64"
        "\x61\x93\x7a\xa4\x67\xd5\x00\xe1\xc2\xd8\xe4\x92\x75\xfe\x56\xb3"
        "\xca\xa6\x76\xc2\xf0\x1c\x8f";
    unsigned char decompressed[1000] = {};

    printHex(dataChunk, sizeof(dataChunk));
    uint32_t len = inflateZlib(dataChunk, sizeof(dataChunk), decompressed, sizeof(decompressed));
    printHex(decompressed, len);
    return 0;
}

答案 1 :(得分:0)

我认为您可能希望以不同方式定义解压缩

unsigned char decompressed[1000];