这是Using boost::spirit::qi to parse numbers with separators的后续问题。
根据sehe非常好的建议,我设法让数字解析工作。然后我尝试将其更新为具有辅助解析器,该解析器使用可选符号处理数字。第二次尝试失败了。我怀疑在如何处理子语法方面我有一些不正确的问题。代码如下:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
unsigned_parser() : unsigned_parser::base_type(start) {
using qi::_val;
using qi::_1;
using qi::eps;
using qi::debug;
using ascii::char_;
bin = eps[_val=0] >> *(char_("01")[_val = _val * 2 + dval(_1)] | '_');
oct = eps[_val=0] >> *(char_("0-7")[_val = _val * 8 + dval(_1)] | '_');
dec = eps[_val=0]
>> *(char_("0-9")[_val = _val * 10 + dval(_1)] | '_');
hex = eps[_val=0]
>> *(char_("0-9a-fA-F")[_val = _val * 16 + dval(_1)] | '_');
start = (char_('0') >>
((char_("xXhH") >> hex[_val=_1])
| (char_("bByY") >> bin[_val=_1])
| (char_("oOqQ") >> oct[_val=_1])
| (char_("dDtT") >> dec[_val=_1])))
| (hex[_val=_1] >> char_("xXhH"))
| (bin[_val=_1] >> char_("bByY"))
| (oct[_val=_1] >> char_("oOqQ"))
| (dec[_val=_1] >> -char_("dDtT"));
start.name("unum");
hex.name("hex");
oct.name("oct");
dec.name("dec");
bin.name("bin");
debug(start);
debug(hex);
debug(oct);
debug(dec);
debug(bin);
}
qi::rule<Iterator, Num()> start;
qi::rule<Iterator, Num()> hex;
qi::rule<Iterator, Num()> oct;
qi::rule<Iterator, Num()> dec;
qi::rule<Iterator, Num()> bin;
struct _dval {
template <typename> struct result { typedef uint8_t type; };
template <typename T> uint8_t operator()(T ch) const {
if (ch >= '0' || ch <= '9') {
return ch - '0';
}
ch = std::tolower(ch);
if (ch >= 'a' || ch <= 'f') {
return ch - 'a' + 10;
}
assert(false);
}
};
boost::phoenix::function<_dval> dval;
};
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(start) {
using qi::eps;
using qi::_val;
using qi::_1;
using ascii::char_;
using phoenix::static_cast_;
unum = unsigned_parser<Iterator, Num>();
start = (char_('-') >> unum[_val=-_1])
| (-char_('+') >> unum[_val=_1]);
unum.name("unum");
start.name("snum");
debug(start);
/* debug(unum); */
}
qi::rule<Iterator, Num()> start;
qi::rule<Iterator, Num()> unum;
};
int main(int argv, const char *argc[]) {
using phoenix::ref;
using qi::eoi;
using qi::_1;
typedef std::string::const_iterator iter;
signed_parser<iter, int64_t> sp;
int64_t val;
if (argv != 2) {
std::cerr << "Usage: " << argc[0] << " <input>" << std::endl;
return 1;
}
std::string test(argc[1]);
iter i = test.begin();
iter end = test.end();
bool rv = phrase_parse(i, end, sp[ref(val)=_1] >> eoi, ascii::space);
if (rv) {
assert(i == end);
std::cout << "Succeeded: " << val << std::endl;
return 0;
}
std::cout << "Failed." << std::endl;
return 1;
}
使用signed_parser,每个解析都会失败。此外,如果我取消注释已注释掉的debug(),程序会出现段错误。
我觉得我已经接近开始明白如何使用它了,所以任何帮助都会受到赞赏。
答案 0 :(得分:2)
使用所有这些单独的规则会使编译器有机会优化解析。
您不能引用临时语法/规则。你需要有语法实例:
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(snum) {
using namespace qi;
snum = lit('-') >> unum
| -lit('+') >> unum
;
BOOST_SPIRIT_DEBUG_NODES((snum))
}
private:
qi::rule<Iterator, Num()> snum;
unsigned_parser<Iterator, Num> unum;
};
这里有一些清理工作:
argc
和argv
会:)使用BOOST_SPIRIT_DEBUG *宏
BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));
如果lit()
或(更糟!)char_()
更喜欢使用自动属性传播(Boost Spirit: "Semantic actions are evil"?)。例如。规则可以简单得多:
snum = lit('-') >> unum
| -lit('+') >> unum
;
使用%=
在存在语义操作时保留自动传播:
snum %= lit('-') >> unum [ _val = -_1 ]
| -lit('+') >> unum
;
同样适用于phrase_parse
调用本身:您可以传递属性的绑定引用。不需要语义动作
执行tolower(ch)
的速度可能较慢(因为您知道它是ASCII),可能不正确(如果您的编译器已签名char
,则会获得符号扩展名)
更新您的dval
演员中有一个相当可怕的错误。范围检查错了!这是我的固定版本:
struct accum_f {
template <typename...> struct result { typedef void type; };
void operator()(char ch, Num& accum, int base) const {
accum *= base;
if (ch >= '0' && ch <= '9') accum += ch - '0';
else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10;
else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10;
else assert(false);
}
};
boost::phoenix::function<accum_f> _accum;
请参阅下文,了解语义操作的相应更改/简化
您可以在前缀分支中使用构建int_parser
;这可能(更快)
警告:当您编写unum
语义 - 无操作时,您必须像使用'0'
一样“捕获”qi::char_
。否则,您会想知道为什么任何前缀格式的数字的结果总是48
。
unum = ('0' >>
( (omit[ char_("xXhH") ] >> hex)
| (omit[ char_("bByY") ] >> bin)
| (omit[ char_("oOqQ") ] >> oct)
| (omit[ char_("dDtT") ] >> dec))
)
| (hex >> omit[ char_("xXhH") ])
| (bin >> omit[ char_("bByY") ])
| (oct >> omit[ char_("oOqQ") ])
| (dec >> omit[ -char_("dDtT") ]);
使用phrase_parse
和船长只要您使用不使用船长的解析器表达式(参见Boost spirit skipper issues)
<强> Live On Coliru 强>
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
unsigned_parser() : unsigned_parser::base_type(unum) {
using namespace qi;
bin = eps[_val=0] >> *(char_("01") [ _accum(_1, _val, 2 )] | '_');
oct = eps[_val=0] >> *(char_("0-7") [ _accum(_1, _val, 8 )] | '_');
dec = eps[_val=0] >> *(char_("0-9") [ _accum(_1, _val, 10)] | '_');
hex = eps[_val=0] >> *(char_("0-9a-fA-F") [ _accum(_1, _val, 16)] | '_');
unum = ('0' >>
( (omit[ char_("xXhH") ] >> hex)
| (omit[ char_("bByY") ] >> bin)
| (omit[ char_("oOqQ") ] >> oct)
| (omit[ char_("dDtT") ] >> dec))
)
| (hex >> omit[ char_("xXhH") ])
| (bin >> omit[ char_("bByY") ])
| (oct >> omit[ char_("oOqQ") ])
| (dec >> omit[ -char_("dDtT") ]);
BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));
}
private:
qi::rule<Iterator, Num()> unum, hex, oct, dec, bin;
struct accum_f {
template <typename...> struct result { typedef void type; };
void operator()(char ch, Num& accum, int base) const {
accum *= base;
if (ch >= '0' && ch <= '9') accum += ch - '0';
else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10;
else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10;
else assert(false);
}
};
boost::phoenix::function<accum_f> _accum;
};
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(snum) {
using namespace qi;
snum %= lit('-') >> unum [ _val = -_1 ]
| -lit('+') >> unum
;
BOOST_SPIRIT_DEBUG_NODES((snum))
}
private:
qi::rule<Iterator, Num()> snum;
unsigned_parser<Iterator, Num> unum;
};
int main(int argc, const char *argv[]) {
typedef std::string::const_iterator iter;
signed_parser<iter, int64_t> const sp;
for (std::string const& s : boost::make_iterator_range(argv+1, argv+argc))
{
std::cout << "\n-----------------------------\nParsing '" << s << "':\n";
int64_t val;
iter i = s.begin(), end = s.end();
bool rv = phrase_parse(i, end, sp >> qi::eoi, ascii::space, val);
if (rv) {
std::cout << "Succeeded: " << val << std::endl;
} else {
std::cout << "Failed." << std::endl;
}
if (i!=end) {
std::cout << "Remaining unparsed: '" << std::string(i,end) << "'\n";
}
}
}
输出:
-----------------------------
Parsing '-124_456d':
Succeeded: -124456
-----------------------------
Parsing '123_456D':
Succeeded: 123456
-----------------------------
Parsing '-123_456T':
Succeeded: -123456
-----------------------------
Parsing '123456t':
Succeeded: 123456
-----------------------------
Parsing '+1_bh':
Succeeded: 27
-----------------------------
Parsing '0_010Q':
Succeeded: 8
-----------------------------
Parsing '+1010_1010_0111_0111_b':
Succeeded: 43639
-----------------------------
Parsing '123_456':
Succeeded: 123456
-----------------------------
Parsing '-123456':
Succeeded: -123456
-----------------------------
Parsing '1_bh':
Succeeded: 27
-----------------------------
Parsing '-0_010Q':
Succeeded: -8
-----------------------------
Parsing '1010_1010_0111_0111_b':
Succeeded: 43639
-----------------------------
Parsing '+0d124_456':
Succeeded: 124456
-----------------------------
Parsing '0D123_456':
Succeeded: 123456
-----------------------------
Parsing '+0T123_456':
Succeeded: 123456
-----------------------------
Parsing '0t123456':
Succeeded: 123456
-----------------------------
Parsing '0h1_b':
Succeeded: 27
-----------------------------
Parsing '0Q0_010':
Succeeded: 8
-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639
-----------------------------
Parsing '06123_45':
Succeeded: 612345
-----------------------------
Parsing '0612345':
Succeeded: 612345
-----------------------------
Parsing '0h1_b':
Succeeded: 27
-----------------------------
Parsing '-0Q0_010':
Succeeded: -8
-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639