Question

我写了一些这样的代码：

std::vector<char> unzip(std::vector<char> const& compressed)
{
   std::vector<char> decompressed;

   boost::iostreams::filtering_ostream os;

   os.push(boost::iostreams::gzip_decompressor());
   os.push(boost::iostreams::back_inserter(decompressed));

   boost::iostreams::write(os, &compressed[0], compressed.size());
   os.reset();
   return decompressed;
}

如果compressed是拉链炸弹，会发生什么？我认为记忆会耗尽，而且过程会崩溃。

那么如何避免这种情况？如何在解压缩之前检查原始数据大小？

Answer 1

你会像往常一样这样做：在解压缩时注意。

您可以使用具有固定/有限容量的缓冲区（例如使用boost::iostreams::array_sink），也可以使用保护程序包装复制操作以获得最大大小。

此外，在您的示例中，输入是内存缓冲区，因此使用设备比输入流更有意义。所以这是一个简单的看法：

std::vector<char> unzip(size_t limit, std::vector<char> const& compressed) {
   std::vector<char> decompressed;

   boost::iostreams::filtering_istream is;

   is.push(boost::iostreams::gzip_decompressor());
   is.push(boost::iostreams::array_source(compressed.data(), compressed.size()));

   while (is && (decompressed.size() < limit)) {
       char buf[512];
       is.read(buf, sizeof(buf));
       decompressed.insert(decompressed.end(), buf, buf + is.gcount());
   }
   return decompressed;
}

当创建一个60字节的简单迷你炸弹时，它会扩展到20千字节的NUL字符：

int main() {
    std::vector<char> const bomb = { 
          char(0x1f), char(0x8b), char(0x08), char(0x08), char(0xd1), char(0x6d), char(0x0e), char(0x5b), char(0x00), char(0x03), char(0x62), char(0x6f),
          char(0x6d), char(0x62), char(0x00), char(0xed), char(0xc1), char(0x31), char(0x01), char(0x00), char(0x00), char(0x00), char(0xc2), char(0xa0),
          char(0xf5), char(0x4f), char(0x6d), char(0x0a), char(0x3f), char(0xa0), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00),
          char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00),
          char(0x00), char(0x80), char(0xb7), char(0x01), char(0x60), char(0x83), char(0xbc), char(0xe6), char(0x00), char(0x50), char(0x00), char(0x00)
        };

    auto max10k  = unzip(10*1024, bomb);
    auto max100k = unzip(100*1024, bomb);

    std::cout << "max10k:  " << max10k.size()  << " bytes\n";
    std::cout << "max100k: " << max100k.size() << " bytes\n";
}

打印 Live On Coliru

max10k:  10240 bytes
max100k: 20480 bytes

投掷

当然，如果超出限制，你可以选择抛出：

std::vector<char> unzip(size_t limit, std::vector<char> const& compressed) {
   std::vector<char> decompressed;

   boost::iostreams::filtering_istream is;

   is.push(boost::iostreams::gzip_decompressor());
   is.push(boost::iostreams::array_source(compressed.data(), compressed.size()));

   while (is) {
       char buf[512];
       is.read(buf, sizeof(buf)); // can't detect EOF before attempting read on some streams

       if (decompressed.size() + is.gcount() >= limit)
           throw std::runtime_error("unzip limit exceeded");

       decompressed.insert(decompressed.end(), buf, buf + is.gcount());
   }
   return decompressed;
}

Answer 2

schorsch_76刚刚说我可以写一个自定义的back_inserter，所以我只写了一个并且它有效：

namespace boost {
namespace iostreams {
template<typename Container>
class limit_back_insert_device {
public:
    typedef typename Container::value_type  char_type;
    typedef sink_tag                        category;
    limit_back_insert_device(Container& cnt, size_t max_size)
        : container(&cnt)
        , max_size(max_size) {
        check(0);
    }
    std::streamsize write(const char_type* s, std::streamsize n) {
        check(n);
        container->insert(container->end(), s, s + n);
        return n;
    }
private:
    void check(size_t n) {
        if (std::numeric_limits<size_t>::max() - n < container->size()) {
            throw std::runtime_error("size_t overflow");
        }

        if ((container->size() + n) > max_size) {
            throw std::runtime_error("container->size() > max_size");
        }
    }
protected:
    Container * container;
    size_t const max_size;
};

template<typename Container>
limit_back_insert_device<Container> limit_back_inserter(Container& cnt,
    size_t max_size) {
    return limit_back_insert_device<Container>(cnt, max_size);
}
} 
}

如何通过boost :: iostream防止拉链炸弹

2 个答案:

投掷