Boost Spirit X3:折叠的一元列表

时间:2018-12-12 22:50:41

标签: c++ parsing boost ebnf boost-spirit-x3

说我有一个(简化的)递归语法,如下所示:

OrExpr    := AndExpr % "or"
AndExpr   := Term % "and"
Term      := ParenExpr | String
ParenExpr := '(' >> OrExpr >> ')'
String    := lexeme['"' >> *(char_ - '"') >> '"']

所以这可行,但是问题在于它将所有内容包装在多层表达式中。例如,字符串"hello" and ("world" or "planet" or "globe")会解析为OrExpr(AndExpr("hello", OrExpr(AndExpr("world"), AndExpr("planet"), AndExpr("globe"))))(使用语法可轻松灵活地播放,但希望您能理解)。我想要的是将一个元素的节点折叠到其父节点中,因此最终会变成AndExpr("hello", OrExpr("world", "parent", "globe"))

这可以通过操作和使用状态机来解决,该状态机仅在内部对象超过一个时才构造外部对象。但是我想知道是否有一种无需使用解析器操作即可解决此问题的方法?


编辑:几乎是最小的示例

Coliru

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>

namespace x3 = boost::spirit::x3;

namespace burningmime::setmatch::ast
{
    // an expression node (either an AND or an OR)
    struct Expr;

    // child of an expression -- either another expression, or a terminal
    struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
    {
        using base_type::base_type;
        using base_type::operator=;
    };

    // tags for expression type
    enum OPER
    {
        OPER_AND = 1,
        OPER_OR = 2
    };

    // see above
    struct Expr
    {
        OPER op;
        std::vector<Node> children;
    };

    // for debugging purposes; this will print all the expressions
    struct AstPrinter
    {
        void operator()(const Expr& node) const
        {
            std::cout << (node.op == OPER_AND ? "And(" : "Or(");
            bool first = true;
            for(const auto& child : node.children)
            {
                if(!first) std::cout << ", ";
                first = false;
                boost::apply_visitor(*this, child);
            }
            std::cout << ")";
        }

        void operator()(const std::string& node) const
        {
            std::cout << node;
        }
    };
}

 // these need to be at top-level scope
 // basically this adds compile-time type information, so the parser knows where to put various attributes
BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)

#define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME, TYPE> NAME = #NAME;
#define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
#define DEFINE_RULE(NAME, GRAMMAR) \
    static const auto NAME##_def = GRAMMAR; \
    BOOST_SPIRIT_DEFINE(NAME)

namespace burningmime::setmatch::parser
{
    // we need to pre-declare the rules so they can be used recursively
    DECLARE_RULE(Phrase, std::string)
    DECLARE_RULE(Term, ast::Node)
    DECLARE_RULE(AndExpr, ast::Expr)
    DECLARE_RULE(OrExpr, ast::Expr)
    DECLARE_RULE(ParenExpr, ast::Expr)

    // keywords
    KEYWORD(and)
    KEYWORD(or)

    static const auto lparen = x3::lit('(');
    static const auto rparen = x3::lit(')');

    // helper parsers
    static const auto keywords = kw_and | kw_or | lparen | rparen;
    static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
    static const auto bareWord = word - keywords;
    static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];

    DEFINE_RULE(Phrase,     quotedString | bareWord)
    DEFINE_RULE(Term,       ParenExpr | Phrase)
    DEFINE_RULE(ParenExpr,  lparen >> OrExpr >> rparen)
    DEFINE_RULE(AndExpr,    x3::attr(ast::OPER_AND) >> (Term % kw_and))
    DEFINE_RULE(OrExpr,     x3::attr(ast::OPER_OR) >> (AndExpr % kw_or))
}

namespace burningmime::setmatch
{
    void parseRuleFluent(const char* buf)
    {
        ast::Expr root;
        auto start = buf, end = start + strlen(buf);
        bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
        if(!success || start != end)
            throw std::runtime_error(std::string("Could not parse rule: ") + buf);
        printf("Result of parsing: %s\n=========================\n", start);
        ast::Node root2(root);
        boost::apply_visitor(ast::AstPrinter(), root2);
    }
}

int main()
{
    burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
}

1 个答案:

答案 0 :(得分:1)

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>

namespace x3 = boost::spirit::x3;

namespace burningmime::setmatch::ast
{
    // an expression node (either an AND or an OR)
    struct Expr;

    // child of an expression -- either another expression, or a terminal
    struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
    {
        using base_type::base_type;
        using base_type::operator=;
    };

    // tags for expression type
    enum OPER
    {
        OPER_AND = 1,
        OPER_OR = 2
    };

    // see above
    struct Expr
    {
        OPER op;
        std::vector<Node> children;
    };

    // for debugging purposes; this will print all the expressions
    struct AstPrinter
    {
        void operator()(const Expr& node) const
        {
            std::cout << (node.op == OPER_AND ? "And(" : "Or(");
            bool first = true;
            for(const auto& child : node.children)
            {
                if(!first) std::cout << ", ";
                first = false;
                boost::apply_visitor(*this, child);
            }
            std::cout << ")";
        }

        void operator()(const std::string& node) const
        {
            std::cout << node;
        }
    };
}

 // these need to be at top-level scope
 // basically this adds compile-time type information, so the parser knows where to put various attributes
BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)

#define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME##_r, TYPE> NAME = #NAME;
#define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
#define DEFINE_RULE(NAME, GRAMMAR) \
    static const auto NAME##_def = GRAMMAR; \
    BOOST_SPIRIT_DEFINE(NAME)

namespace burningmime::setmatch::parser
{
    // we need to pre-declare the rules so they can be used recursively
    DECLARE_RULE(Phrase,    std::string)
    DECLARE_RULE(Term,      ast::Node)
    DECLARE_RULE(AndExpr,   ast::Node)
    DECLARE_RULE(OrExpr,    ast::Node)
    DECLARE_RULE(ParenExpr, ast::Node)

    // keywords
    KEYWORD(and)
    KEYWORD(or)

    static const auto lparen = x3::lit('(');
    static const auto rparen = x3::lit(')');

    // helper parsers
    static const auto keywords = kw_and | kw_or | lparen | rparen;
    static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
    static const auto bareWord = word - keywords;
    static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];

    DEFINE_RULE(Phrase,     quotedString | bareWord)
    DEFINE_RULE(Term,       ParenExpr | Phrase)
    DEFINE_RULE(ParenExpr,  lparen >> OrExpr >> rparen)
    template <ast::OPER Op>
    struct make_node
    {
        template <typename Context >
        void operator()(Context const& ctx) const
        {
            if (_attr(ctx).size() == 1)
                _val(ctx) = std::move(_attr(ctx)[0]);
            else
                _val(ctx) = ast::Expr{ Op, std::move(_attr(ctx)) };
        }
    };
    DEFINE_RULE(AndExpr,    (Term % kw_and)[make_node<ast::OPER_AND>{}])
    DEFINE_RULE(OrExpr,     (AndExpr % kw_or)[make_node<ast::OPER_OR>{}])
}

namespace burningmime::setmatch
{
    void parseRuleFluent(const char* buf)
    {
        ast::Node root;
        auto start = buf, end = start + strlen(buf);
        bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
        if (!success || start != end)
            throw std::runtime_error(std::string("Could not parse rule: ") + buf);
        printf("Result of parsing: %s\n=========================\n", start);
        boost::apply_visitor(ast::AstPrinter(), root);
    }
}

int main()
{
    burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
}

https://wandbox.org/permlink/kMSHOHG0pgwGr0zv

输出:

Result of parsing: 
=========================
And("hello", Or("world", "planet", "globe"))