我想使用Boost.Spirit.Lex来提取二进制文件;为此我编写了以下程序(这是一个摘录):
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
#include <fstream>
#include <iterator>
#include <string>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
#define X 1
#define Y 2
#define Z 3
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n", Y)
(".", Z);
}
};
class counter
{
public:
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
auto first = spirit::make_default_multi_pass (std::istream_iterator<char> (ifs));
auto last = spirit::make_default_multi_pass (std::istream_iterator<char> ());
size_t w, c, l;
word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
w = c = l = 0;
bool r = lex::tokenize (first, last, word_count_functor, boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
return 0;
}
构建返回以下错误:
lexer.hpp:390:46: error: non-const lvalue reference to type 'const char *' cannot bind to a value of unrelated type
现在,错误是由具体词法分析器lex::lexer<>
的定义引起的;实际上它的第一个参数默认为const char *
。如果我使用spirit::istream_iterator
或spirit::make_default_multi_pass (.....)
,我也会收到同样的错误
但是,如果我指定lex::lexer<>
的正确模板参数,我会收到大量错误!
解决方案?
更新
我已经推出了所有源文件;这是word_counter网站的例子。
答案 0 :(得分:2)
我认为真正的问题没有显示出来。您没有显示first
或last
,我觉得您可能会有临时工作。
这是我提出的一个验证样本,或许你可以看到它在做什么 - 错误---不同:)
const char*
)spirit::istream_iterator
)#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#ifdef MEMORY_MAPPED
# include <boost/iostreams/device/mapped_file.hpp>
#endif
namespace /*anon*/
{
namespace qi =boost::spirit::qi;
namespace lex=boost::spirit::lex;
template <typename Lexer>
struct mylexer_t : lex::lexer<Lexer>
{
mylexer_t()
{
fileheader = "hello";
this->self = fileheader
| space [ lex::_pass = lex::pass_flags::pass_ignore ];
}
lex::token_def<lex::omit>
fileheader, space;
};
template <typename Iterator> struct my_grammar_t
: public qi::grammar<Iterator>
{
template <typename TokenDef>
my_grammar_t(TokenDef const& tok)
: my_grammar_t::base_type(header)
{
header = tok.fileheader;
BOOST_SPIRIT_DEBUG_NODE(header);
}
private:
qi::rule<Iterator> header;
};
}
namespace /* */ {
std::string safechar(char ch) {
switch (ch) {
case '\t': return "\\t"; break;
case '\0': return "\\0"; break;
case '\r': return "\\r"; break;
case '\n': return "\\n"; break;
}
return std::string(1, ch);
}
template <typename It>
std::string showtoken(const boost::iterator_range<It>& range)
{
std::ostringstream oss;
oss << '[';
std::transform(range.begin(), range.end(), std::ostream_iterator<std::string>(oss), safechar);
oss << ']';
return oss.str();
}
}
bool parsefile(const std::string& spec)
{
#ifdef MEMORY_MAPPED
typedef char const* It;
boost::iostreams::mapped_file mmap(spec.c_str(), boost::iostreams::mapped_file::readonly);
char const *first = mmap.const_data();
char const *last = first + mmap.size();
#else
typedef char const* It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
std::string v(std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>());
It first = &v[0];
It last = first+v.size();
#endif
typedef lex::lexertl::token<It /*, boost::mpl::vector<char, unsigned int, std::string> */> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef mylexer_t<lexer_type>::iterator_type iterator_type;
try
{
static mylexer_t<lexer_type> mylexer;
static my_grammar_t<iterator_type> parser(mylexer);
auto iter = mylexer.begin(first, last);
auto end = mylexer.end();
bool r = qi::parse(iter, end, parser);
r = r && (iter == end);
if (!r)
std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n";
return r;
}
catch (const qi::expectation_failure<iterator_type>& e)
{
std::cerr << "FIXME: expected " << e.what_ << ", got '";
for (auto it=e.first; it!=e.last; it++)
std::cerr << showtoken(it->value());
std::cerr << "'" << std::endl;
return false;
}
}
int main()
{
if (parsefile("input.bin"))
return 0;
return 1;
}
对于变体:
typedef boost::spirit::istream_iterator It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
It first(in), last;
答案 1 :(得分:2)
好的,既然问题已经改变,这里有一个新答案,用完整的代码示例解决了一些问题。
首先,您需要使用自定义令牌类型。即
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor;
// instead of:
// word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
显然,习惯使用typedef lex::lexertl::token<boost::spirit::istream_iterator>
您需要使用min_token_id
而不是令牌ID 1,2,3。另外,请将其作为易于维护的枚举:
enum token_ids {
X = lex::min_token_id + 1,
Y,
Z,
};
您不能再在默认令牌.size()
上使用value()
,因为迭代器范围不再是RandomAccessRange。相反,请使用专门用于boost::distance()
的{{1}}:
iterator_range
合并这些修补程序: Live On Coliru
++w; c += boost::distance(t.value()); // t.value ().size ();
自行运行时,打印
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/bind.hpp>
#include <fstream>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
enum token_ids {
X = lex::min_token_id + 1,
Y,
Z,
};
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n" , Y)
("." , Z);
}
};
struct counter
{
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += boost::distance(t.value()); // t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
ifs >> std::noskipws;
boost::spirit::istream_iterator first(ifs), last;
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor;
size_t w = 0, c = 0, l = 0;
bool r = lex::tokenize (first, last, word_count_functor,
boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
}