提升精神lex和气。集成跳过解析器

时间:2013-10-04 15:18:50

标签: c++ parsing boost boost-spirit lexical-analysis

编辑:我已经删除了词法分析器,因为它没有与Qi完全整合,只是混淆了语法(参见here)。


我正在尝试在精神lex框架之上发展语法。当我尝试将跳过解析器移动到语法中时,我开始出错。

因此,将qi::grammar<>qi::rule<> event个签名从<Iterator>更改为<Iterator,void(),ascii::space_type>。在语法结构中。我需要做什么?

此外,我已将token_def设置为省略optional令牌及其他一些令牌的属性。为什么它仍然在词法分析器的可选语义操作中为我提供有效的_val?我问的原因是因为我认为问题与qi中事件规则的rhs上的可选标记的字符串属性有关,而不是与规则的void()属性签名统一。

#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/cstdint.hpp>
#include <string>
#include<exception>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_paranthesis("\"{\""),
        right_paranthesis("\"}\""),
        colon(":"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = " "
            | left_paranthesis    [ std::cout << px::val("lpar") << std::endl]
            | right_paranthesis   [ std::cout << px::val("rpar") << std::endl]
            | colon               [ std::cout << px::val("colon") << std::endl]
            | namespace_          [ std::cout << px::val("kw namesapce") << std::endl]
            | event               [ std::cout << px::val("kw event") << std::endl]
            | optional            [ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            [ std::cout << px::val("required") << std::endl]
            | ordinal             [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          [std::cout << px::val("val identifier(") << _val << ")" << std::endl];
    }

    lex::token_def<> left_paranthesis, right_paranthesis, colon;
    lex::token_def<lex::omit> namespace_, event, optional, required;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

template <typename Iterator>
struct grammar : qi::grammar<Iterator>
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event)
    {
      //start = event;
      event = tok.optional [ std::cout << px::val("== OPTIONAL") << std::endl];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator> event;
};

// std::string test = "namespace{ event { OPtiONAL 124:hello_world RequireD} } ";

std::string test = "OPTIONAL";

int main()
{
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type> grammar(token_lexer);

    std::string::iterator first = test.begin();
    std::string::iterator last = test.end(); 
    bool r; 

    r = lex::tokenize_and_parse(first, last, token_lexer, grammar);

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
   /* 
    lexer_type::iterator_type iter; 

    try
    {
        iter = token_lexer.begin(first,last);
    }
    catch(std::exception & e)
    {
        std::cout << e.what() << std::endl;
    }

    lexer_type::iterator_type end = token_lexer.end();

    while (iter != end && token_is_valid(*iter))
        ++iter;
   */ 
}

此语法失败:

template <typename Iterator>
struct grammar : qi::grammar<Iterator,void(),ascii::space_type>
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event)
    {
      //start = event;
      event = tok.optional [ std::cout << px::val("== OPTIONAL") << std::endl];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator,void(),ascii::space_type> event;
};

1 个答案:

答案 0 :(得分:2)

与大多数精神一样。如果你想做一些真实的事情,你必须花费数小时寻找一个没有记录的解决方案,但却埋没在例子和邮件列表中。认真考虑搬到ragel或flex / bison。问题不在于机器是不可用的,因为它没有记录。

在这种情况下,当查看lex文档时,通过查看具有tokenize_and_phrase_parse函数的lex解析器api调用,可以慷慨地误导。当你试图像qi::phrase_parse那样使用它时,它并没有真正起作用。文档也没有解释如何使用这个函数连接一个队长。

通过更改词法分析器,然后使用一些未记录的qi-s​​kipper构造初始化语法和规则来完成将空格管理员带入解析器的连接。您可以在lex示例目录中看到这一点(示例5)。编译和工作的代码:

#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/cstdint.hpp>
#include <string>
#include<exception>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_paranthesis("\"{\""),
        right_paranthesis("\"}\""),
        colon(":"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = 
              left_paranthesis    [ std::cout << px::val("lpar") << std::endl]
            | right_paranthesis   [ std::cout << px::val("rpar") << std::endl]
            | colon               [ std::cout << px::val("colon") << std::endl]
            | namespace_          [ std::cout << px::val("kw namesapce") << std::endl]
            | event               [ std::cout << px::val("kw event") << std::endl]
            | optional            [ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            [ std::cout << px::val("required") << std::endl]
            | ordinal             [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          [std::cout << px::val("val identifier(") << _val << ")" << std::endl];


        this->self("WS") =   lex::token_def<>("[ \\t\\n]+");
    }


    lex::token_def<> left_paranthesis, right_paranthesis, colon;
    lex::token_def<lex::omit> namespace_, event, optional, required;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,qi::in_state_skipper<Lexer> >
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event)
    {
      //start = event;
      event = tok.optional [ std::cout << px::val("== OPTIONAL") << std::endl];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator, qi::in_state_skipper<Lexer> > event;
};

// std::string test = "namespace{ event { OPtiONAL 124:hello_world RequireD} } ";

std::string test = " OPTIONAL ";

int main()
{
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);

    std::string::iterator it = test.begin();
    iterator_type first = token_lexer.begin(it, test.end());
    iterator_type last = token_lexer.end();

    bool r; 

    r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]);

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
   /* 
    lexer_type::iterator_type iter; 

    try
    {
        iter = token_lexer.begin(first,last);
    }
    catch(std::exception & e)
    {
        std::cout << e.what() << std::endl;
    }

    lexer_type::iterator_type end = token_lexer.end();

    while (iter != end && token_is_valid(*iter))
        ++iter;
   */ 
}