我正在尝试解析terminfo定义文本文件。我是Boost.Spirit的新手。我从简单的语法开始,只解析注释行,空行和终端定义。正如语法中的代码注释所示,取消注释[_val = _1]
definition
会中断编译。为什么?我能解决吗?
如果我忽略实际的terminfo文件,我希望下面的代码能够解析这种文本:
# comment line
first definition line
second
third line
# another comment line
代码:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_eol.hpp>
#include <boost/spirit/include/qi_eoi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <vector>
#include <iostream>
#include <string>
namespace termcxx
{
namespace parser
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace px = boost::phoenix;
//using qi::double_;
using ascii::space;
//using px::ref;
using px::construct;
//using qi::eps;
//using qi::lit;
using qi::_val;
using qi::_1;
using ascii::char_;
using qi::eol;
using qi::eoi;
struct context
{
int dummy;
context () = default;
context (context const &) = default;
context (std::vector<char> a)
{ }
context (std::vector<char> a, std::vector<char> b)
{ }
};
} }
BOOST_FUSION_ADAPT_STRUCT(
termcxx::parser::context,
(int, dummy))
namespace termcxx
{
namespace parser
{
template <typename Iterator>
struct parser
: qi::grammar<Iterator, context()>
{
qi::rule<Iterator, std::vector<char> > comment_line
= (*space >> '#' >> *(char_ - eol) >> (eol | eoi))[_val = _1]
;
qi::rule<Iterator, std::vector<char> > empty_line
= (*space >> (eol | eoi))[_val = _1]
;
qi::rule<Iterator, std::vector<char> > def_first_line
= (+(char_ - eol) >> (eol | eoi))[_val = _1]
;
qi::rule<Iterator, std::vector<char> > def_subsequent_line
= (+space >> +(char_ - eol) >> (eol | eoi))[_val = _1]
;
qi::rule<Iterator, std::vector<char> > definition
= (def_first_line >> *def_subsequent_line)//[_val = _1] // Uncommenting the [_val = _1] breaks compilation. Why?
;
qi::rule<Iterator, context()> start
= (*(comment_line
| empty_line
| definition))[_val = construct<context> ()]
;
parser()
: parser::base_type(start)
{ }
};
template struct parser<std::string::iterator>;
} // namespace parser
} // namespace termcxx
答案 0 :(得分:5)
为什么要坚持指定[_val=_1]
?它是多余的,因为默认属性传播就是这样做的。事实上它很疼,见下文
接下来,(def_first_line >> *def_subsequent_line)
的属性类型(显然)与std::vector<char>
不兼容。也许你可以
raw[]
获取完整匹配的输入BOOST_SPIRIT_ACTIONS_ALLOW_ATTR_COMPAT
(我不确定这是否得到很好的支持)另外,
std::vector<char>
替换为std::string
<强>更新强>
还有一些问题:
您拼错了大多数规则的属性类型(缺少()
):
qi::rule<Iterator, std::string()> comment_line;
qi::rule<Iterator, std::string()> empty_line;
qi::rule<Iterator, std::string()> def_first_line;
qi::rule<Iterator, std::string()> def_subsequent_line;
qi::rule<Iterator, std::string()> definition;
在empty_line
匹配的eoi
在输入结束时导致无限循环
使用char_
也接受空格(改为使用graph
)。
def_first_line = graph >> +(char_ - eol) >> (eol|eoi);
使用qi::space
也吃线端!请改用qi::blank
青睐可靠性:
empty_line = *blank >> eol;
comment_line = *blank >> '#' >> *(char_ - eol) >> (eol|eoi);
def_first_line = graph >> +(char_ - eol) >> (eol|eoi);
def_subsequent_line = +blank >> +(char_ - eol) >> (eol|eoi);
definition = (def_first_line >> *def_subsequent_line);
start = (
*(comment_line | empty_line | definition)
) [ _val = px::construct<context>() ]
;
与Spirit合作时,这种简单的习惯可以为您节省数小时的工作和理智。
您可以稍微简化包含
这是一个固定版本 Live On Coliru ,带有输出:
<start>
<try># comment line\n\nfirs</try>
<comment_line>
<try># comment line\n\nfirs</try>
<success>\nfirst definition li</success>
<attributes>[[ , c, o, m, m, e, n, t, , l, i, n, e]]</attributes>
</comment_line>
<comment_line>
<try>\nfirst definition li</try>
<fail/>
</comment_line>
<empty_line>
<try>\nfirst definition li</try>
<success>first definition lin</success>
<attributes>[[]]</attributes>
</empty_line>
<comment_line>
<try>first definition lin</try>
<fail/>
</comment_line>
<empty_line>
<try>first definition lin</try>
<fail/>
</empty_line>
<definition>
<try>first definition lin</try>
<def_first_line>
<try>first definition lin</try>
<success> second \n third li</success>
<attributes>[[f, i, r, s, t, , d, e, f, i, n, i, t, i, o, n, , l, i, n, e]]</attributes>
</def_first_line>
<def_subsequent_line>
<try> second \n third li</try>
<success> third line\n\n# anot</success>
<attributes>[[f, i, r, s, t, , d, e, f, i, n, i, t, i, o, n, , l, i, n, e, , , s, e, c, o, n, d, ]]</attributes>
</def_subsequent_line>
<def_subsequent_line>
<try> third line\n\n# anot</try>
<success>\n# another comment l</success>
<attributes>[[f, i, r, s, t, , d, e, f, i, n, i, t, i, o, n, , l, i, n, e, , , s, e, c, o, n, d, , , , t, h, i, r, d, , l, i, n, e]]</attributes>
</def_subsequent_line>
<def_subsequent_line>
<try>\n# another comment l</try>
<fail/>
</def_subsequent_line>
<success>\n# another comment l</success>
<attributes>[[f, i, r, s, t, , d, e, f, i, n, i, t, i, o, n, , l, i, n, e, , , s, e, c, o, n, d, , , , t, h, i, r, d, , l, i, n, e]]</attributes>
</definition>
<comment_line>
<try>\n# another comment l</try>
<fail/>
</comment_line>
<empty_line>
<try>\n# another comment l</try>
<success># another comment li</success>
<attributes>[[]]</attributes>
</empty_line>
<comment_line>
<try># another comment li</try>
<success></success>
<attributes>[[ , a, n, o, t, h, e, r, , c, o, m, m, e, n, t, , l, i, n, e, !]]</attributes>
</comment_line>
<comment_line>
<try></try>
<fail/>
</comment_line>
<empty_line>
<try></try>
<fail/>
</empty_line>
<definition>
<try></try>
<def_first_line>
<try></try>
<fail/>
</def_first_line>
<fail/>
</definition>
<success></success>
<attributes>[]</attributes>
</start>
Success
完整的参考代码:
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <vector>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace termcxx { namespace parser {
namespace ascii = boost::spirit::ascii;
namespace px = boost::phoenix;
//using qi::double_;
using ascii::blank;
//using px::ref;
using px::construct;
//using qi::eps;
//using qi::lit;
using qi::_val;
using qi::_1;
using ascii::char_;
using ascii::graph;
using qi::eol;
using qi::eoi;
struct context
{
int dummy;
context () = default;
context (context const &) = default;
context (std::vector<char> a) { }
context (std::vector<char> a, std::vector<char> b) { }
};
} }
BOOST_FUSION_ADAPT_STRUCT(termcxx::parser::context, (int, dummy))
namespace termcxx { namespace parser {
template <typename Iterator>
struct parser : qi::grammar<Iterator, context()>
{
parser() : parser::base_type(start)
{
empty_line = *blank >> eol;
comment_line = *blank >> '#' >> *(char_ - eol) >> (eol|eoi);
def_first_line = graph >> +(char_ - eol) >> (eol|eoi);
def_subsequent_line = +blank >> +(char_ - eol) >> (eol|eoi);
definition = (def_first_line >> *def_subsequent_line);
start = (
*(comment_line | empty_line | definition)
) [ _val = px::construct<context>() ]
;
BOOST_SPIRIT_DEBUG_NODES((start)(def_first_line)(def_subsequent_line)(definition)(empty_line)(comment_line))
}
private:
qi::rule<Iterator, context()> start;
qi::rule<Iterator, std::string()> comment_line;
qi::rule<Iterator, std::string()> empty_line;
qi::rule<Iterator, std::string()> def_first_line;
qi::rule<Iterator, std::string()> def_subsequent_line;
qi::rule<Iterator, std::string()> definition;
};
} }
int main()
{
using It = boost::spirit::istream_iterator;
termcxx::parser::parser<It> g;
It f(std::cin >> std::noskipws), l;
termcxx::parser::context data;
if (qi::parse(f,l,g,data))
std::cout << "Success\n";
else
std::cout << "Failure\n";
if (f != l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
答案 1 :(得分:2)
让我们看看这一行究竟发生了什么:
qi::rule<Iterator, std::vector<char> > definition
= (def_first_line >> *def_subsequent_line)[_val = _1];
;
def_first_line
是一条规则。它的属性是一个
std::vector<char>
。 def_subsequent_line
是另一条规则。再次
其属性为std::vector<char>
。* def_subsequent_line
是通过将kleene运算符*
应用于def_subsequent_line
而获得的解析器。它的隐含属性是vector< std::vector<char> >
。(def_first_line >> *def_subsequent_line)
。这是另一个解析器。由于精神复合属性规则,其隐式属性再次为vector< std::vector<char> >
。基本上,该行应为:
qi::rule<Iterator, std::vector<std::vector<char> > > definition
= (def_first_line >> *def_subsequent_line)[_val = _1];
;
这是有道理的,不是吗?您希望单独获取每一行,而不是将所有字符放在同一向量中。
现在,正如旁注:
[_val = _1]
并非真的有必要。您应该使用运算符%=
在语法构造函数中初始化规则,该运算符负责隐式属性。skipper
规则,该规则可自动处理间距和评论,然后将此规则与phrase_parse
一起使用。std::string
代替vector<char>
,精神足够聪明,可以理解字符序列是一个字符串。