使用Boost.Spirit编译一个简单的解析器

时间:2012-02-22 22:57:12

标签: c++ boost-spirit

我正在攻击的简单骨架实用程序的一部分我有一个用于触发文本替换的语法。我觉得这是一个很好的方式来适应Boost.Spirit,但模板错误是一种独特的喜悦。

以下是完整的代码:

#include <iostream>
#include <iterator>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace bsq = boost::spirit::qi;

namespace {
template<typename Iterator>
struct skel_grammar : public bsq::grammar<Iterator> {
    skel_grammar();

private:
    bsq::rule<Iterator> macro_b;
    bsq::rule<Iterator> macro_e;
    bsq::rule<Iterator, bsq::ascii::space_type> id;
    bsq::rule<Iterator> macro;
    bsq::rule<Iterator> text;
    bsq::rule<Iterator> start;
};

template<typename Iterator>
skel_grammar<Iterator>::skel_grammar() : skel_grammar::base_type(start)
{
    text = bsq::no_skip[+(bsq::char_ - macro_b)[bsq::_val += bsq::_1]];
    macro_b = bsq::lit("<<");
    macro_e = bsq::lit(">>");
    macro %= macro_b >> id >> macro_e;
    id %= -(bsq::ascii::alpha | bsq::char_('_'))
        >> +(bsq::ascii::alnum | bsq::char_('_'));
    start = *(text | macro);
}
}  // namespace

int main(int argc, char* argv[])
{
    std::string input((std::istreambuf_iterator<char>(std::cin)),
                      std::istreambuf_iterator<char>());
    skel_grammar<std::string::iterator> grammar;
    bool r = bsq::parse(input.begin(), input.end(), grammar);
    std::cout << std::boolalpha << r << '\n';
    return 0;
}

这段代码出了什么问题?

1 个答案:

答案 0 :(得分:7)

嗯。我觉得我们在聊天中讨论了一些细节,而不是在问题中反映出来。

让我用我的'玩具'实现来娱乐你,完成测试用例,语法会识别<<macros>>这样的,包括嵌套扩展。

显着特征:

  1. 扩展是使用回调(process())完成的,为您提供最大的灵活性(您可以使用查找表,导致解析失败,具体取决于宏内容,甚至还有副作用独立于输出
  2. 解析器经过优化以支持流模式。查看spirit::istream_iterator关于如何在流模式下解析输入(Stream-based Parsing Made Easy)。如果您的输入流为10 GB,并且只包含4个宏,则这具有明显的好处 - 它是爬网性能(或内存不足)与缩放之间的差异。
    • 请注意,演示仍然会写入字符串缓冲区(通过oss)。但是,您可以轻松地将输出直接挂钩到std::cout或者说std::ofstream实例
  3. 急切地进行扩展,因此您可以使用间接宏获得漂亮的效果。参见测试用例
  4. 我甚至演示了一种简单的方法来支持转义 <<>>分隔符(#define SUPPORT_ESCAPES
  5. 没有进一步的麻烦:

    守则

    注意 由于懒惰,我需要-std==c++0x,但 SUPPORT_ESCAPES定义

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    
    namespace qi = boost::spirit::qi;
    namespace phx= boost::phoenix;
    namespace fsn= boost::fusion;
    
    namespace
    {
        #define SUPPORT_ESCAPES
    
        static bool process(std::string& macro)
        {
            if (macro == "error") {
                return false; // fail the parse
            }
    
            if (macro == "hello") {
                macro = "bye";
            } else if (macro == "bye") {
                macro = "We meet again";
            } else if (macro == "sideeffect") {
                std::cerr << "this is a side effect while parsing\n";
                macro = "(done)";
            } else if (std::string::npos != macro.find('~')) {  
                std::reverse(macro.begin(), macro.end());
                macro.erase(std::remove(macro.begin(), macro.end(), '~'));
            } else {
                macro = std::string("<<") + macro + ">>"; // this makes the unsupported macros appear unchanged
            }
    
            return true;
        }
    
        template<typename Iterator, typename OutIt>
            struct skel_grammar : public qi::grammar<Iterator>
        {
            struct fastfwd {
                template<typename,typename> struct result { typedef bool type; };
    
                template<typename R, typename O> 
                    bool operator()(const R&r,O& o) const
                {
    #ifndef SUPPORT_ESCAPES
                    o = std::copy(r.begin(),r.end(),o);
    #else
                    auto f = std::begin(r), l = std::end(r);
                    while(f!=l)
                    {
                        if (('\\'==*f) && (l == ++f))
                            break;
                        *o++ = *f++;
                    }
    #endif
                    return true; // false to fail the parse
                }
            } copy;
    
            skel_grammar(OutIt& out) : skel_grammar::base_type(start)
            {
                using namespace qi;
    
    #ifdef SUPPORT_ESCAPES
                rawch = ('\\' >> char_) | char_;
    #else
    #           define rawch qi::char_
    #endif
    
                macro = ("<<" >> (
                               (*(rawch - ">>" - "<<") [ _val += _1 ]) 
                             % macro                   [ _val += _1 ] // allow nests
                          ) >> 
                          ">>")  
                    [ _pass = phx::bind(process, _val) ];
    
                start = 
                    raw [ +(rawch - "<<") ] [ _pass = phx::bind(copy, _1, phx::ref(out)) ] 
                  % macro                   [ _pass = phx::bind(copy, _1, phx::ref(out)) ]
                  ;
    
                BOOST_SPIRIT_DEBUG_NODE(start);
                BOOST_SPIRIT_DEBUG_NODE(macro);
    
    
    #           undef rawch
            }
    
            private:
    #ifdef SUPPORT_ESCAPES
            qi::rule<Iterator, char()> rawch;
    #endif
            qi::rule<Iterator, std::string()> macro;
            qi::rule<Iterator> start;
        };
    }
    
    int main(int argc, char* argv[])
    {
        std::string input = 
            "Greeting is <<hello>> world!\n"
            "Side effects are <<sideeffect>> and <<other>> vars are untouched\n"
            "Empty <<>> macros are ok, as are stray '>>' pairs.\n"
            "<<nested <<macros>> (<<hello>>?) work>>\n"
            "The order of expansion (evaluation) is _eager_: '<<<<hello>>>>' will expand to the same as '<<bye>>'\n"
            "Lastly you can do algorithmic stuff too: <<!esrever ~ni <<hello>>>>\n"
    #ifdef SUPPORT_ESCAPES // bonus: escapes
            "You can escape \\<<hello>> (not expanded to '<<hello>>')\n"
            "Demonstrate how it <<avoids <\\<nesting\\>> macros>>.\n"
    #endif
            ;
    
        std::ostringstream oss;
        std::ostream_iterator<char> out(oss);
    
        skel_grammar<std::string::iterator, std::ostream_iterator<char> > grammar(out);
    
        std::string::iterator f(input.begin()), l(input.end());
        bool r = qi::parse(f, l, grammar);
    
        std::cout << "parse result: " << (r?"success":"failure") << "\n";
        if (f!=l)
            std::cout << "unparsed remaining: '" << std::string(f,l) << "'\n";
    
        std::cout << "Streamed output:\n\n" << oss.str() << '\n';
    
        return 0;
    }
    

    测试输出

    this is a side effect while parsing
    parse result: success
    Streamed output:
    
    Greeting is bye world!
    Side effects are (done) and <<other>> vars are untouched
    Empty <<>> macros are ok, as are stray '>>' pairs.
    <<nested <<macros>> (bye?) work>>
    The order of expansion (evaluation) is _eager_: 'We meet again' will expand to the same as 'We meet again'
    Lastly you can do algorithmic stuff too: eyb in reverse!
    You can escape <<hello>> (not expanded to 'bye')
    Demonstrate how it <<avoids <<nesting>> macros>>.
    

    隐藏着相当多的功能。我建议您一起查看测试用例和the process() callback,看看发生了什么。

    干杯&amp; HTH:)