用boost精神实现运算符优先级

时间:2014-06-14 12:35:48

标签: c++ parsing boost

我试图复制this example以实现类似C ++的运算符优先级规则(我从一个子集开始,但我最终计划添加其他的)。

尽我所能,我无法解析单个二进制操作的语法。它会解析文字(44,3.42," stackoverflow"),但是3 + 4会失败。

我确实看了this questionthis one,试图让我的解决方案起作用,但得到了相同的结果。

(为了保持简短,我只会在此处发布相关位,完整代码为here

AST的相关数据结构:

enum class BinaryOperator
{
    ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL,
};

typedef boost::variant<double, int, std::string> Litteral;

struct Identifier { std::string name; };

typedef boost::variant<
    Litteral, 
    Identifier,
    boost::recursive_wrapper<UnaryOperation>,
    boost::recursive_wrapper<BinaryOperation>,
    boost::recursive_wrapper<FunctionCall>
> Expression;

struct BinaryOperation
{
    Expression rhs, lhs;
    BinaryOperator op;

    BinaryOperation() {}
    BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};

语法:

template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
    BoltGrammar() : BoltGrammar::base_type(start, "start")
    {
        equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL);
        equal %= (lowerGreater >> equalOp >> lowerGreater);
        equal.name("equal");

        lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL);
        lowerGreater %= (shift >> lowerGreaterOp >> shift);
        lowerGreater.name("lower or greater");

        shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT);
        shift %= (addSub >> shiftOp >> addSub);
        shift.name("shift");

        addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT);
        addSub %= (multDivMod >> addSubOp >> multDivMod);
        addSub.name("add or sub");

        multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO);
        multDivMod %= (value >> multDivModOp >> value);
        multDivMod.name("mult, div, or mod");

        value %= identifier | litteral | ('(' > expression > ')');
        value.name("value");

        start %= qi::eps >> *(value >> qi::lit(';'));
        start.name("start");

        expression %= identifier | litteral | equal;
        expression.name("expression");

        identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
        identifier.name("identifier");

        litteral %= qi::double_ | qi::int_ | quotedString;
        litteral.name("litteral");

        quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
        quotedString.name("quoted string");

        namespace phx = boost::phoenix;
        using namespace qi::labels;
        qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
}

qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;

qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Litteral()> litteral;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};

1 个答案:

答案 0 :(得分:4)

主要问题(确实)似乎在second answer you linked to中解决了。

让我谈谈一些问题:

  1. 主要问题是复合:

    • 您的开始规则是

      start     %= qi::eps >> *(value >> qi::lit(';'));
      

      这意味着它需要value s:

      value     %= identifier | literal | ('(' > expression > ')');
      

      但是,由于此解析标识符和文字或带括号的子表达式,因此永远不会解析3+4二进制操作。

    • 您的表达式规则,再次允许identifierliteral(冗余/混淆):

      expression %= identifier | literal | equal;
      

    我认为你想要更像

    的东西
    expression   = '(' >> expression >> ')' | equal | value;
    value        = identifier | literal;
    
    // and then
    start        = qi::eps >> -expression % ';';
    
  2. 对于运营商所在的情况,您的BinaryOperation制作仅允许 ;这打破了规则嵌套运算符优先级的方式:multDivOp永远不会被接受为匹配,除非碰巧后跟addSubOp

    addSub %= (multDivMod >> addSubOp >> multDivMod);
    multDivMod %= (value >> multDivModOp >> value);
    

    最好修复此问题,如链接答案所示:

    addSub     = multDivMod >> -(addSubOp     >> multDivMod);
    multDivMod = value      >> -(multDivModOp >> value);
    

    您可以使用语义操作来动态构建AST节点&#34;:

    addSub     = multDivMod >> -(addSubOp     >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
    multDivMod = value      >> -(multDivModOp >> value)      [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
    

    这打败了那些乏味的&#34;声明式方法动手(导致大量回溯,例如Boost spirit poor perfomance with Alternative parser

  3. 文字规则会将整数解析为double,因为它不是严格的:

    literal   %= qi::double_ | qi::int_ | quotedString;
    

    你可以解决这个问题:

    qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
    
    literal      = quotedString | strict_double | qi::int_;
    
  4. FunctionCall应将functionName改为Identifier(不是std::string

    BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
    
  5. Expression operator<<可能(应该)是boost::static_visitor,以便您

    • 消除魔术类型开关号
    • 让编译器检查交换机的完整性
    • 可以利用重载决策来启用变体成员类型

    使用c ++ 11,代码仍然可以在一个函数中:

    std::ostream& operator<<(std::ostream& os, const Expression& expr)
    {
        os << "Expression ";
        struct v : boost::static_visitor<> {
            v(std::ostream& os) : os(os) {}
            std::ostream& os;
    
            void operator()(Literal         const& e) const { os << "(literal: "    << e                           << ")"; }
            void operator()(Identifier      const& e) const { os << "(identifier: " << e.name                      << ")"; }
            void operator()(UnaryOperation  const& e) const { os << "(unary op: "   << boost::fusion::as_vector(e) << ")"; }
            void operator()(BinaryOperation const& e) const { os << "(binary op: "  << boost::fusion::as_vector(e) << ")"; }
            void operator()(FunctionCall    const& e) const {
                os << "(function call: " << e.functionName << "("; 
                if (e.args.size() > 0) os << e.args.front();
                for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
                os << ")";
            }
        };
        boost::apply_visitor(v(os), expr);
        return os;
    }
    
  6. 您可以使用BOOST_SPIRIT_DEBUG_NODES宏来命名规则

    BOOST_SPIRIT_DEBUG_NODES(
        (start)(expression)(identifier)(literal)(quotedString)
        (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
    )
    
  7. 您应该从spirit/include/目录中加入,然后转发到spirit/home/phoenix/include/,而不是直接包含它们。

  8. 这是一个完整的工作示例,它还改进了可读性的语法规则 Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #define BOOST_SPIRIT_USE_PHOENIX_V3
    
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/fusion/include/io.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/variant.hpp>
    #include <iostream>
    #include <string>
    #include <vector>
    
    namespace qi    = boost::spirit::qi;
    namespace phx   = boost::phoenix;
    namespace ascii = boost::spirit::ascii;
    
    enum class UnaryOperator
    {
        NOT,
        PLUS,
        MINUS,
    };
    
    std::ostream& operator<<(std::ostream& os, const UnaryOperator op)
    {
        switch (op)
        {
            case UnaryOperator::NOT:   return os << "!";
            case UnaryOperator::PLUS:  return os << "+";
            case UnaryOperator::MINUS: return os << "-";
        }
    
        assert(false);
    }
    
    enum class BinaryOperator
    {
        ADD,        SUBTRACT,      MULTIPLY, DIVIDE,
        MODULO,
        LEFT_SHIFT, RIGHT_SHIFT,
        EQUAL,      NOT_EQUAL,
        LOWER,      LOWER_EQUAL,
        GREATER,    GREATER_EQUAL,
    };
    
    std::ostream& operator<<(std::ostream& os, const BinaryOperator op)
    {
        switch (op)
        {
            case BinaryOperator::ADD:           return os << "+";
            case BinaryOperator::SUBTRACT:      return os << "-";
            case BinaryOperator::MULTIPLY:      return os << "*";
            case BinaryOperator::DIVIDE:        return os << "/";
            case BinaryOperator::MODULO:        return os << "%";
            case BinaryOperator::LEFT_SHIFT:    return os << "<<";
            case BinaryOperator::RIGHT_SHIFT:   return os << ">>";
            case BinaryOperator::EQUAL:         return os << "==";
            case BinaryOperator::NOT_EQUAL:     return os << "!=";
            case BinaryOperator::LOWER:         return os << "<";
            case BinaryOperator::LOWER_EQUAL:   return os << "<=";
            case BinaryOperator::GREATER:       return os << ">";
            case BinaryOperator::GREATER_EQUAL: return os << ">=";
        }
    
        assert(false);
    }
    
    typedef boost::variant<
        double, 
        int, 
        std::string
    > Literal;
    
    struct Identifier
    {
        std::string name;
    };
    BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name))
    
    struct UnaryOperation;
    struct BinaryOperation;
    struct FunctionCall;
    
    typedef boost::variant<
        Literal, 
        Identifier,
        boost::recursive_wrapper<UnaryOperation>,
        boost::recursive_wrapper<BinaryOperation>,
        boost::recursive_wrapper<FunctionCall>
    > Expression;
    
    struct UnaryOperation
    {
        Expression rhs;
        UnaryOperator op;
    };
    BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op))
    
    struct BinaryOperation
    {
        Expression rhs;
        BinaryOperator op;
        Expression lhs;
    
        BinaryOperation() {}
        BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
    };
    BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs))
    
    struct FunctionCall
    {
        Identifier functionName;
        std::vector<Expression> args;
    };
    BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
    
    struct Program
    {
        std::vector<Expression> statements;
    };
    BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements))
    
    std::ostream& operator<<(std::ostream& os, const Expression& expr)
    {
        os << "Expression ";
        struct v : boost::static_visitor<> {
            v(std::ostream& os) : os(os) {}
            std::ostream& os;
    
            void operator()(Literal         const& e) const { os << "(literal: "    << e                           << ")"; }
            void operator()(Identifier      const& e) const { os << "(identifier: " << e.name                      << ")"; }
            void operator()(UnaryOperation  const& e) const { os << "(unary op: "   << boost::fusion::as_vector(e) << ")"; }
            void operator()(BinaryOperation const& e) const { os << "(binary op: "  << boost::fusion::as_vector(e) << ")"; }
            void operator()(FunctionCall    const& e) const {
                os << "(function call: " << e.functionName << "("; 
                if (e.args.size() > 0) os << e.args.front();
                for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
                os << ")";
            }
        };
        boost::apply_visitor(v(os), expr);
        return os;
    }
    
    std::ostream& operator<<(std::ostream& os, const Program& prog)
    {
        os << "Program" << std::endl << "{" << std::endl;
        for (const Expression& expr : prog.statements) 
        { 
            std::cout << "\t" << expr << std::endl; 
        }
        os << "}" << std::endl;
    
        return os;
    }
    
    template<typename Iterator, typename Skipper>
    struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
    {
        BoltGrammar() : BoltGrammar::base_type(start, "start")
        {
            using namespace qi::labels;
    
            equalOp.add
                ("==", BinaryOperator::EQUAL)
                ("!=", BinaryOperator::NOT_EQUAL);
            lowerGreaterOp.add
                ("<", BinaryOperator::LOWER)
                ("<=", BinaryOperator::LOWER_EQUAL)
                (">", BinaryOperator::GREATER)
                (">=", BinaryOperator::GREATER_EQUAL);
            shiftOp.add
                ("<<", BinaryOperator::LEFT_SHIFT)
                (">>", BinaryOperator::RIGHT_SHIFT);
            addSubOp.add
                ("+", BinaryOperator::ADD)
                ("-", BinaryOperator::SUBTRACT);
            multDivModOp.add
                ("*", BinaryOperator::MULTIPLY)
                ("/", BinaryOperator::DIVIDE)
                ("%", BinaryOperator::MODULO);
    
            equal        = lowerGreater [ _val=_1 ] >> -(equalOp        >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
            lowerGreater = shift        [ _val=_1 ] >> -(lowerGreaterOp >> shift)        [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
            shift        = addSub       [ _val=_1 ] >> -(shiftOp        >> addSub)       [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
            addSub       = multDivMod   [ _val=_1 ] >> -(addSubOp       >> multDivMod)   [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
            multDivMod   = value        [ _val=_1 ] >> -(multDivModOp   >> value)        [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
    
            addSub     = multDivMod >> -(addSubOp     >> multDivMod);
            multDivMod = value      >> -(multDivModOp >> value);
    
            addSub     = multDivMod >> -(addSubOp     >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
            multDivMod = value      >> -(multDivModOp >> value)      [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
    
            start        = qi::eps >> -expression % ';';
            expression   = '(' >> expression >> ')' | equal | value;
            value        = identifier | literal;
            identifier   = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
    
            qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
    
            literal      = quotedString | strict_double | qi::int_;
            quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
    
            qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
    
            BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString)
                    (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
                    )
        }
    
        qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
        qi::rule<Iterator, Skipper, Expression()>  equal,  lowerGreater,  shift,  addSub,  multDivMod;
        qi::rule<Iterator, Skipper, Expression()>  value;
    
        qi::rule<Iterator, Skipper, Program()>     start;
        qi::rule<Iterator, Skipper, Expression()>  expression;
        qi::rule<Iterator, Skipper, Identifier()>  identifier;
        qi::rule<Iterator, Skipper, Literal()>     literal;
        qi::rule<Iterator, Skipper, std::string()> quotedString;
    };
    
    typedef std::string::iterator Iterator;
    typedef boost::spirit::ascii::space_type Skipper;
    
    int main()
    {
        BoltGrammar<Iterator, Skipper> grammar;
    
        std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;");
    
        Program prog;
        Iterator iter = str.begin(), last = str.end();
        bool r = phrase_parse(iter, last, grammar, ascii::space, prog);
    
        if (r && iter == last)
        {
            std::cout << "Parsing succeeded: " << prog << std::endl;
        }
        else
        {
            std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl;
        }
    
        return 0;
    }
    

    打印:

    Parsing succeeded: Program
    {
        Expression (literal: 3)
        Expression (literal: 4.2)
        Expression (literal: lounge <c++>)
        Expression (binary op: (Expression (literal: 3) + Expression (literal: 4)))
    }