我试图复制this example以实现类似C ++的运算符优先级规则(我从一个子集开始,但我最终计划添加其他的)。
尽我所能,我无法解析单个二进制操作的语法。它会解析文字(44,3.42," stackoverflow"),但是3 + 4
会失败。
我确实看了this question和this one,试图让我的解决方案起作用,但得到了相同的结果。
(为了保持简短,我只会在此处发布相关位,完整代码为here)
AST的相关数据结构:
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL,
};
typedef boost::variant<double, int, std::string> Litteral;
struct Identifier { std::string name; };
typedef boost::variant<
Litteral,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct BinaryOperation
{
Expression rhs, lhs;
BinaryOperator op;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
语法:
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL);
equal %= (lowerGreater >> equalOp >> lowerGreater);
equal.name("equal");
lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL);
lowerGreater %= (shift >> lowerGreaterOp >> shift);
lowerGreater.name("lower or greater");
shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT);
shift %= (addSub >> shiftOp >> addSub);
shift.name("shift");
addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT);
addSub %= (multDivMod >> addSubOp >> multDivMod);
addSub.name("add or sub");
multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO);
multDivMod %= (value >> multDivModOp >> value);
multDivMod.name("mult, div, or mod");
value %= identifier | litteral | ('(' > expression > ')');
value.name("value");
start %= qi::eps >> *(value >> qi::lit(';'));
start.name("start");
expression %= identifier | litteral | equal;
expression.name("expression");
identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
identifier.name("identifier");
litteral %= qi::double_ | qi::int_ | quotedString;
litteral.name("litteral");
quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
quotedString.name("quoted string");
namespace phx = boost::phoenix;
using namespace qi::labels;
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Litteral()> litteral;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
答案 0 :(得分:4)
主要问题(确实)似乎在second answer you linked to中解决了。
让我谈谈一些问题:
主要问题是复合:
您的开始规则是
start %= qi::eps >> *(value >> qi::lit(';'));
这意味着它需要value
s:
value %= identifier | literal | ('(' > expression > ')');
但是,由于此解析仅标识符和文字或带括号的子表达式,因此永远不会解析3+4
二进制操作。
您的表达式规则,再次允许identifier
或literal
(冗余/混淆):
expression %= identifier | literal | equal;
我认为你想要更像
的东西expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
// and then
start = qi::eps >> -expression % ';';
对于运营商所在的情况,您的BinaryOperation
制作仅允许 ;这打破了规则嵌套运算符优先级的方式:multDivOp
永远不会被接受为匹配,除非碰巧后跟addSubOp
:
addSub %= (multDivMod >> addSubOp >> multDivMod);
multDivMod %= (value >> multDivModOp >> value);
最好修复此问题,如链接答案所示:
addSub = multDivMod >> -(addSubOp >> multDivMod);
multDivMod = value >> -(multDivModOp >> value);
您可以使用语义操作来动态构建AST节点&#34;:
addSub = multDivMod >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
这打败了那些乏味的&#34;声明式方法动手(导致大量回溯,例如Boost spirit poor perfomance with Alternative parser)
文字规则会将整数解析为double,因为它不是严格的:
literal %= qi::double_ | qi::int_ | quotedString;
你可以解决这个问题:
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
FunctionCall
应将functionName
改为Identifier
(不是std::string
)
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
您Expression
operator<<
可能(应该)是boost::static_visitor
,以便您
使用c ++ 11,代码仍然可以在一个函数中:
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
您可以使用BOOST_SPIRIT_DEBUG_NODES
宏来命名规则
BOOST_SPIRIT_DEBUG_NODES(
(start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
您应该从spirit/include/
目录中加入,然后转发到spirit/home/
或phoenix/include/
,而不是直接包含它们。
这是一个完整的工作示例,它还改进了可读性的语法规则 Live On Coliru :
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
enum class UnaryOperator
{
NOT,
PLUS,
MINUS,
};
std::ostream& operator<<(std::ostream& os, const UnaryOperator op)
{
switch (op)
{
case UnaryOperator::NOT: return os << "!";
case UnaryOperator::PLUS: return os << "+";
case UnaryOperator::MINUS: return os << "-";
}
assert(false);
}
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE,
MODULO,
LEFT_SHIFT, RIGHT_SHIFT,
EQUAL, NOT_EQUAL,
LOWER, LOWER_EQUAL,
GREATER, GREATER_EQUAL,
};
std::ostream& operator<<(std::ostream& os, const BinaryOperator op)
{
switch (op)
{
case BinaryOperator::ADD: return os << "+";
case BinaryOperator::SUBTRACT: return os << "-";
case BinaryOperator::MULTIPLY: return os << "*";
case BinaryOperator::DIVIDE: return os << "/";
case BinaryOperator::MODULO: return os << "%";
case BinaryOperator::LEFT_SHIFT: return os << "<<";
case BinaryOperator::RIGHT_SHIFT: return os << ">>";
case BinaryOperator::EQUAL: return os << "==";
case BinaryOperator::NOT_EQUAL: return os << "!=";
case BinaryOperator::LOWER: return os << "<";
case BinaryOperator::LOWER_EQUAL: return os << "<=";
case BinaryOperator::GREATER: return os << ">";
case BinaryOperator::GREATER_EQUAL: return os << ">=";
}
assert(false);
}
typedef boost::variant<
double,
int,
std::string
> Literal;
struct Identifier
{
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name))
struct UnaryOperation;
struct BinaryOperation;
struct FunctionCall;
typedef boost::variant<
Literal,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct UnaryOperation
{
Expression rhs;
UnaryOperator op;
};
BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op))
struct BinaryOperation
{
Expression rhs;
BinaryOperator op;
Expression lhs;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs))
struct FunctionCall
{
Identifier functionName;
std::vector<Expression> args;
};
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
struct Program
{
std::vector<Expression> statements;
};
BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements))
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
std::ostream& operator<<(std::ostream& os, const Program& prog)
{
os << "Program" << std::endl << "{" << std::endl;
for (const Expression& expr : prog.statements)
{
std::cout << "\t" << expr << std::endl;
}
os << "}" << std::endl;
return os;
}
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
using namespace qi::labels;
equalOp.add
("==", BinaryOperator::EQUAL)
("!=", BinaryOperator::NOT_EQUAL);
lowerGreaterOp.add
("<", BinaryOperator::LOWER)
("<=", BinaryOperator::LOWER_EQUAL)
(">", BinaryOperator::GREATER)
(">=", BinaryOperator::GREATER_EQUAL);
shiftOp.add
("<<", BinaryOperator::LEFT_SHIFT)
(">>", BinaryOperator::RIGHT_SHIFT);
addSubOp.add
("+", BinaryOperator::ADD)
("-", BinaryOperator::SUBTRACT);
multDivModOp.add
("*", BinaryOperator::MULTIPLY)
("/", BinaryOperator::DIVIDE)
("%", BinaryOperator::MODULO);
equal = lowerGreater [ _val=_1 ] >> -(equalOp >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
lowerGreater = shift [ _val=_1 ] >> -(lowerGreaterOp >> shift) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
shift = addSub [ _val=_1 ] >> -(shiftOp >> addSub) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
addSub = multDivMod [ _val=_1 ] >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value [ _val=_1 ] >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
addSub = multDivMod >> -(addSubOp >> multDivMod);
multDivMod = value >> -(multDivModOp >> value);
addSub = multDivMod >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
start = qi::eps >> -expression % ';';
expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
identifier = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, Expression()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Literal()> literal;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
typedef std::string::iterator Iterator;
typedef boost::spirit::ascii::space_type Skipper;
int main()
{
BoltGrammar<Iterator, Skipper> grammar;
std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;");
Program prog;
Iterator iter = str.begin(), last = str.end();
bool r = phrase_parse(iter, last, grammar, ascii::space, prog);
if (r && iter == last)
{
std::cout << "Parsing succeeded: " << prog << std::endl;
}
else
{
std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl;
}
return 0;
}
打印:
Parsing succeeded: Program
{
Expression (literal: 3)
Expression (literal: 4.2)
Expression (literal: lounge <c++>)
Expression (binary op: (Expression (literal: 3) + Expression (literal: 4)))
}