我写了一些这样的代码:
std::vector<char> unzip(std::vector<char> const& compressed)
{
std::vector<char> decompressed;
boost::iostreams::filtering_ostream os;
os.push(boost::iostreams::gzip_decompressor());
os.push(boost::iostreams::back_inserter(decompressed));
boost::iostreams::write(os, &compressed[0], compressed.size());
os.reset();
return decompressed;
}
如果compressed
是拉链炸弹,会发生什么?我认为记忆会耗尽,而且过程会崩溃。
那么如何避免这种情况?如何在解压缩之前检查原始数据大小?
答案 0 :(得分:2)
你会像往常一样这样做:在解压缩时注意。
您可以使用具有固定/有限容量的缓冲区(例如使用boost::iostreams::array_sink
),也可以使用保护程序包装复制操作以获得最大大小。
此外,在您的示例中,输入是内存缓冲区,因此使用设备比输入流更有意义。所以这是一个简单的看法:
std::vector<char> unzip(size_t limit, std::vector<char> const& compressed) {
std::vector<char> decompressed;
boost::iostreams::filtering_istream is;
is.push(boost::iostreams::gzip_decompressor());
is.push(boost::iostreams::array_source(compressed.data(), compressed.size()));
while (is && (decompressed.size() < limit)) {
char buf[512];
is.read(buf, sizeof(buf));
decompressed.insert(decompressed.end(), buf, buf + is.gcount());
}
return decompressed;
}
当创建一个60字节的简单迷你炸弹时,它会扩展到20千字节的NUL字符:
int main() {
std::vector<char> const bomb = {
char(0x1f), char(0x8b), char(0x08), char(0x08), char(0xd1), char(0x6d), char(0x0e), char(0x5b), char(0x00), char(0x03), char(0x62), char(0x6f),
char(0x6d), char(0x62), char(0x00), char(0xed), char(0xc1), char(0x31), char(0x01), char(0x00), char(0x00), char(0x00), char(0xc2), char(0xa0),
char(0xf5), char(0x4f), char(0x6d), char(0x0a), char(0x3f), char(0xa0), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00),
char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00), char(0x00),
char(0x00), char(0x80), char(0xb7), char(0x01), char(0x60), char(0x83), char(0xbc), char(0xe6), char(0x00), char(0x50), char(0x00), char(0x00)
};
auto max10k = unzip(10*1024, bomb);
auto max100k = unzip(100*1024, bomb);
std::cout << "max10k: " << max10k.size() << " bytes\n";
std::cout << "max100k: " << max100k.size() << " bytes\n";
}
max10k: 10240 bytes
max100k: 20480 bytes
当然,如果超出限制,你可以选择抛出:
std::vector<char> unzip(size_t limit, std::vector<char> const& compressed) {
std::vector<char> decompressed;
boost::iostreams::filtering_istream is;
is.push(boost::iostreams::gzip_decompressor());
is.push(boost::iostreams::array_source(compressed.data(), compressed.size()));
while (is) {
char buf[512];
is.read(buf, sizeof(buf)); // can't detect EOF before attempting read on some streams
if (decompressed.size() + is.gcount() >= limit)
throw std::runtime_error("unzip limit exceeded");
decompressed.insert(decompressed.end(), buf, buf + is.gcount());
}
return decompressed;
}
答案 1 :(得分:1)
schorsch_76刚刚说我可以写一个自定义的back_inserter,所以我只写了一个并且它有效:
namespace boost {
namespace iostreams {
template<typename Container>
class limit_back_insert_device {
public:
typedef typename Container::value_type char_type;
typedef sink_tag category;
limit_back_insert_device(Container& cnt, size_t max_size)
: container(&cnt)
, max_size(max_size) {
check(0);
}
std::streamsize write(const char_type* s, std::streamsize n) {
check(n);
container->insert(container->end(), s, s + n);
return n;
}
private:
void check(size_t n) {
if (std::numeric_limits<size_t>::max() - n < container->size()) {
throw std::runtime_error("size_t overflow");
}
if ((container->size() + n) > max_size) {
throw std::runtime_error("container->size() > max_size");
}
}
protected:
Container * container;
size_t const max_size;
};
template<typename Container>
limit_back_insert_device<Container> limit_back_inserter(Container& cnt,
size_t max_size) {
return limit_back_insert_device<Container>(cnt, max_size);
}
}
}