如何使boost :: spirit解析器和词法分析器能够处理包含文件

时间:2017-11-06 21:58:24

标签: c++ boost-spirit

这是一个do-nothing lexer& parser - 它返回字符串read。 我希望扩展这个能够处理类似C ++的包含语句。 我可以想象如何做到这一点 - 但我想知道是否有一些更容易或已经可用的方式。 如果我必须这样做,我将实现自己的迭代器(传递给词法分析器)。这个迭代器将包含

  • 字符串的索引(可能使用-1表示end()迭代器)
  • 指向此字符串的指针

遇到一些include语句的词法分析器会将文件插入到当前位置的字符串中,覆盖include语句。 你会怎么做?

这是我的do-nothing lexer / parser:

#include <boost/phoenix.hpp>
#include <boost/bind.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>

namespace lex     = boost::spirit::lex;
namespace qi      = boost::spirit::qi;
namespace phoenix = boost::phoenix;


template<typename Lexer>
class lexer:public lex::lexer<Lexer>
{   public:
    typedef lex::token_def<char> char_token_type;
    char_token_type m_sChar;
    //lex::token_def<lex::omit> m_sInclude;
    lexer(void)
        : m_sChar(".")//,
        //m_sInclude("^#include \"[^\"]*\"")
    {   this->self += m_sChar;
    }
};

template<typename Iterator>
class grammar : public qi::grammar<Iterator, std::string()>
{   public:
    qi::rule<Iterator, std::string()> m_sStart;
    template<typename Tokens>
    explicit grammar(Tokens const& tokens)
        : grammar::base_type(m_sStart)
    {   m_sStart %= *tokens.m_sChar >> qi::eoi;
    }
};


int main(int, char**)
{
    typedef lex::lexertl::token<std::string::const_iterator, boost::mpl::vector<char> > token_type;
    typedef lexer<lex::lexertl::actor_lexer<token_type> > expression_lexer_type;
    typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
    typedef grammar<expression_lexer_iterator_type> expression_grammar_type;

    expression_lexer_type lexer;
    expression_grammar_type grammar(lexer);
    const std::string s_ac = "this is a test\n\
#include \"test.dat\"\n\
";
    std::string s;
    auto pBegin = std::begin(s_ac);
        lex::tokenize_and_parse(pBegin, std::end(s_ac), lexer, grammar, s);
}

2 个答案:

答案 0 :(得分:1)

首先,存在一个基于精神的预处理器:Boost Wave(另见How do I implement include directives using boost::spirit::lex?

其次,&#34;将包含文件的内容插入到字符串值&#34;既无用(用于lexing目的)也非常低效:

  • 它没用,因为包含文件会形成一个令牌(!?),这意味着你的解析器无法对包含的内容采取行动
  • 它不是通用的,因为嵌套包含不会以这种方式发生
  • 即使目标只是将/ include / include文件逐字输出到等效的输出流,通过将内容完全复制到内存中,通过词法分析器将其复制到解析器中,这样做的效率非常低! ,只是为了流出来。您可以使用最少的分配将输入流虹吸到输出流中。

我建议以下任意组合:

  • 单独关注:不要将解析与口译混为一谈。因此,如果您要解析include指令,那么您将返回include语句的表示,然后可以将其传递给解释它的代码

  • 一个特殊的,更强大的关注点分离案例是将包含处理移到预处理阶段。实际上,自定义迭代器类型可以做到这一点,但是我会在它上面构建词法分析器,因此词法分析器不必知道包含,而只需要对源进行修改,而不必(不得不)知道确切的起源。

答案 1 :(得分:1)

下面的代码用&#34; abcd&#34;替换include语句。 - 应该是文件的内容......

#include <boost/phoenix.hpp>
#include <boost/bind.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/phoenix/object.hpp>
#include <boost/spirit/include/qi_char_class.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/mpl/index_of.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>

#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
#include <iterator>


namespace lex     = boost::spirit::lex;
namespace qi      = boost::spirit::qi;
namespace phoenix = boost::phoenix;

struct myIterator:std::iterator<std::random_access_iterator_tag, char>
{   std::string *m_p;
    std::size_t m_iPos;
    myIterator(void)
        :m_p(nullptr),
        m_iPos(~std::size_t(0))
    {
    }
    myIterator(std::string &_r, const bool _bEnd = false)
        :m_p(&_r),
        m_iPos(_bEnd ? ~std::size_t(0) : 0)
    {
    }
    myIterator(const myIterator &_r)
        :m_p(_r.m_p),
        m_iPos(_r.m_iPos)
    {
    }
    myIterator &operator=(const myIterator &_r)
    {   if (this != &_r)
        {   m_p = _r.m_p;
            m_iPos = _r.m_iPos;
        }
        return *this;
    }
    const char &operator*(void) const
    {   return m_p->at(m_iPos);
    }
    bool operator==(const myIterator &_r) const
    {   return m_p == _r.m_p && m_iPos == _r.m_iPos;
    }
    bool operator!=(const myIterator &_r) const
    {   return m_p != _r.m_p || m_iPos != _r.m_iPos;
    }
    myIterator &operator++(void)
    {   ++m_iPos;
        if (m_iPos == m_p->size())
            m_iPos = ~std::size_t(0);
        return *this;
    }
    myIterator operator++(int)
    {   const myIterator s(*this);
        operator++();
        return s;
    }
};
struct include
{   auto operator()(myIterator &_rStart, myIterator &_rEnd) const
    {       // erase what has been matched (the include statement)
        _rStart.m_p->erase(_rStart.m_iPos, _rEnd.m_iPos - _rStart.m_iPos);
            // and insert the contents of the file
        _rStart.m_p->insert(_rStart.m_iPos, "abcd");
        _rEnd = _rStart;
        return lex::pass_flags::pass_ignore;
//lex::_pass = lex::pass_flags::pass_ignore
    }
};
template<typename Lexer>
class lexer:public lex::lexer<Lexer>
{   public:
    typedef lex::token_def<char> char_token_type;
    char_token_type m_sChar;
    lex::token_def<lex::omit> m_sInclude;
    lexer(void)
        : m_sChar("."),
        m_sInclude("#include [\"][^\"]*[\"]")
    {   this->self += m_sInclude[lex::_pass = boost::phoenix::bind(include(), lex::_start, lex::_end)]
            | m_sChar;
    }
};

template<typename Iterator>
class grammar : public qi::grammar<Iterator, std::string()>
{   public:
    qi::rule<Iterator, std::string()> m_sStart;
    template<typename Tokens>
    explicit grammar(Tokens const& tokens)
        : grammar::base_type(m_sStart)
    {   m_sStart %= *tokens.m_sChar >> qi::eoi;
    }
};


int main(int, char**)
{
    typedef lex::lexertl::token<myIterator, boost::mpl::vector<char> > token_type;
    typedef lexer<lex::lexertl::actor_lexer<token_type> > expression_lexer_type;
    typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
    typedef grammar<expression_lexer_iterator_type> expression_grammar_type;

    expression_lexer_type lexer;
    expression_grammar_type grammar(lexer);
    std::string s_ac = "this is a test\n\
#include \"test.dat\"\n\
";
    std::string s;
    myIterator pBegin(s_ac);
        lex::tokenize_and_parse(pBegin, myIterator(s_ac, true), lexer, grammar, s);
}