我为Boost iostream编写了一个简单的过滤器,用于检测文件是压缩文件还是简单文本,如果是,则将其委派给gzip_decompressor
。
问题是我在输入流中回溯以在解压缩器中再次提供数据。只有某些流不支持此功能,并因暴力线程异常而中断。
相反,我认为,好吧,让我们使用basic_array_source
来输入两个字符,但是此源不支持read
调用!
所以这一直有效:
struct gz_decompressor {
typedef char char_type;
typedef boost::iostreams::multichar_input_filter_tag category;
boost::iostreams::gzip_decompressor m_decompressor{15, backtest::GzReader::GZIP_BUFFER_SIZE};
bool m_initialized{false};
bool m_is_compressed{false};
template<typename Source>
std::streamsize read(Source& src, char* s, std::streamsize n) {
if (!m_initialized) {
init(src, s, n);
}
if (m_is_compressed) {
return m_decompressor.read(src, s, n);
}
return boost::iostreams::read(src, s, n);
}
};
我不知道的部分是:
template<typename Source>
void init(Source& src, char* s, std::streamsize n) {
char header[2];
header[0] = boost::iostreams::get(src);
header[1] = boost::iostreams::get(src);
m_is_compressed = header[0] == static_cast<char>(0x1f) && header[2] == static_cast<char>(0x8b);
m_initialized = true;
boost::iostreams::basic_array_source<char> source(header);
if (m_is_compressed) {
m_decompressor.read(source, s, n); // Nope, is not allowed!
}
else {
boost::iostreams::read(source, s, n);
}
}
关于如何正确执行此操作的任何线索,即不寻求帮助吗?
答案 0 :(得分:0)
通过重用gzip_decompressor
使用的代码(peekable_source
),我得到了一个不完善的解决方案:
using namespace boost::iostreams;
template<typename Source>
struct PeekableSource {
typedef char char_type;
struct category : source_tag, peekable_tag { };
explicit PeekableSource(Source& src, const std::string& putback = "")
: src_(src), putback_(putback), offset_(0)
{ }
std::streamsize read(char* s, std::streamsize n)
{
std::streamsize result = 0;
// Copy characters from putback buffer
std::streamsize pbsize =
static_cast<std::streamsize>(putback_.size());
if (offset_ < pbsize) {
result = (std::min)(n, pbsize - offset_);
BOOST_IOSTREAMS_CHAR_TRAITS(char)::copy(
s, putback_.data() + offset_, result);
offset_ += result;
if (result == n)
return result;
}
// Read characters from src_
std::streamsize amt =
boost::iostreams::read(src_, s + result, n - result);
return amt != -1 ?
result + amt :
result ? result : -1;
}
bool putback(char c)
{
if (offset_) {
putback_[--offset_] = c;
} else {
boost::throw_exception(
boost::iostreams::detail::bad_putback());
}
return true;
}
void putback(const std::string& s)
{
putback_.replace(0, offset_, s);
offset_ = 0;
}
// Returns true if some characters have been putback but not re-read.
bool has_unconsumed_input() const
{
return offset_ < static_cast<std::streamsize>(putback_.size());
}
// Returns the sequence of characters that have been put back but not re-read.
std::string unconsumed_input() const
{
return std::string(putback_, offset_, putback_.size() - offset_);
}
Source& src_;
std::string putback_;
std::streamsize offset_;
};
struct gzDecompressor {
typedef char char_type;
typedef multichar_input_filter_tag category;
gzip_decompressor m_decompressor;
bool m_initialized{false};
bool m_is_compressed{false};
std::string m_putback;
template<typename Source>
void init(Source& src) {
std::string data;
data.push_back(get(src));
data.push_back(get(src));
m_is_compressed = data[0] == static_cast<char>(0x1f) && data[1] == static_cast<char>(0x8b);
src.putback(data);
m_initialized = true;
}
template<typename Source>
std::streamsize read(Source& src, char* s, std::streamsize n) {
PeekableSource<Source> peek(src, m_putback);
if (!m_initialized) {
init(peek);
}
if (m_is_compressed) {
return m_decompressor.read(peek, s, n);
}
return boost::iostreams::read(peek, s, n);
}
};
那不是很好,因为现在有两个可以缓存数据的中间源,但是至少大部分工作应该通过read
接口完成,而不是逐字节进行,因此这会降低性能关注。