这是一个do-nothing lexer& parser - 它返回字符串read。 我希望扩展这个能够处理类似C ++的包含语句。 我可以想象如何做到这一点 - 但我想知道是否有一些更容易或已经可用的方式。 如果我必须这样做,我将实现自己的迭代器(传递给词法分析器)。这个迭代器将包含
遇到一些include语句的词法分析器会将文件插入到当前位置的字符串中,覆盖include语句。 你会怎么做?
这是我的do-nothing lexer / parser:
#include <boost/phoenix.hpp>
#include <boost/bind.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
template<typename Lexer>
class lexer:public lex::lexer<Lexer>
{ public:
typedef lex::token_def<char> char_token_type;
char_token_type m_sChar;
//lex::token_def<lex::omit> m_sInclude;
lexer(void)
: m_sChar(".")//,
//m_sInclude("^#include \"[^\"]*\"")
{ this->self += m_sChar;
}
};
template<typename Iterator>
class grammar : public qi::grammar<Iterator, std::string()>
{ public:
qi::rule<Iterator, std::string()> m_sStart;
template<typename Tokens>
explicit grammar(Tokens const& tokens)
: grammar::base_type(m_sStart)
{ m_sStart %= *tokens.m_sChar >> qi::eoi;
}
};
int main(int, char**)
{
typedef lex::lexertl::token<std::string::const_iterator, boost::mpl::vector<char> > token_type;
typedef lexer<lex::lexertl::actor_lexer<token_type> > expression_lexer_type;
typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
typedef grammar<expression_lexer_iterator_type> expression_grammar_type;
expression_lexer_type lexer;
expression_grammar_type grammar(lexer);
const std::string s_ac = "this is a test\n\
#include \"test.dat\"\n\
";
std::string s;
auto pBegin = std::begin(s_ac);
lex::tokenize_and_parse(pBegin, std::end(s_ac), lexer, grammar, s);
}
答案 0 :(得分:1)
首先,存在一个基于精神的预处理器:Boost Wave(另见How do I implement include directives using boost::spirit::lex?)
其次,&#34;将包含文件的内容插入到字符串值&#34;既无用(用于lexing目的)也非常低效:
我建议以下任意组合:
单独关注:不要将解析与口译混为一谈。因此,如果您要解析include指令,那么您将返回include语句的表示,然后可以将其传递给解释它的代码
一个特殊的,更强大的关注点分离案例是将包含处理移到预处理阶段。实际上,自定义迭代器类型可以做到这一点,但是我会在它上面构建词法分析器,因此词法分析器不必知道包含,而只需要对源进行修改,而不必(不得不)知道确切的起源。
答案 1 :(得分:1)
下面的代码用&#34; abcd&#34;替换include语句。 - 应该是文件的内容......
#include <boost/phoenix.hpp>
#include <boost/bind.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/phoenix/object.hpp>
#include <boost/spirit/include/qi_char_class.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/mpl/index_of.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
#include <iterator>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
struct myIterator:std::iterator<std::random_access_iterator_tag, char>
{ std::string *m_p;
std::size_t m_iPos;
myIterator(void)
:m_p(nullptr),
m_iPos(~std::size_t(0))
{
}
myIterator(std::string &_r, const bool _bEnd = false)
:m_p(&_r),
m_iPos(_bEnd ? ~std::size_t(0) : 0)
{
}
myIterator(const myIterator &_r)
:m_p(_r.m_p),
m_iPos(_r.m_iPos)
{
}
myIterator &operator=(const myIterator &_r)
{ if (this != &_r)
{ m_p = _r.m_p;
m_iPos = _r.m_iPos;
}
return *this;
}
const char &operator*(void) const
{ return m_p->at(m_iPos);
}
bool operator==(const myIterator &_r) const
{ return m_p == _r.m_p && m_iPos == _r.m_iPos;
}
bool operator!=(const myIterator &_r) const
{ return m_p != _r.m_p || m_iPos != _r.m_iPos;
}
myIterator &operator++(void)
{ ++m_iPos;
if (m_iPos == m_p->size())
m_iPos = ~std::size_t(0);
return *this;
}
myIterator operator++(int)
{ const myIterator s(*this);
operator++();
return s;
}
};
struct include
{ auto operator()(myIterator &_rStart, myIterator &_rEnd) const
{ // erase what has been matched (the include statement)
_rStart.m_p->erase(_rStart.m_iPos, _rEnd.m_iPos - _rStart.m_iPos);
// and insert the contents of the file
_rStart.m_p->insert(_rStart.m_iPos, "abcd");
_rEnd = _rStart;
return lex::pass_flags::pass_ignore;
//lex::_pass = lex::pass_flags::pass_ignore
}
};
template<typename Lexer>
class lexer:public lex::lexer<Lexer>
{ public:
typedef lex::token_def<char> char_token_type;
char_token_type m_sChar;
lex::token_def<lex::omit> m_sInclude;
lexer(void)
: m_sChar("."),
m_sInclude("#include [\"][^\"]*[\"]")
{ this->self += m_sInclude[lex::_pass = boost::phoenix::bind(include(), lex::_start, lex::_end)]
| m_sChar;
}
};
template<typename Iterator>
class grammar : public qi::grammar<Iterator, std::string()>
{ public:
qi::rule<Iterator, std::string()> m_sStart;
template<typename Tokens>
explicit grammar(Tokens const& tokens)
: grammar::base_type(m_sStart)
{ m_sStart %= *tokens.m_sChar >> qi::eoi;
}
};
int main(int, char**)
{
typedef lex::lexertl::token<myIterator, boost::mpl::vector<char> > token_type;
typedef lexer<lex::lexertl::actor_lexer<token_type> > expression_lexer_type;
typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
typedef grammar<expression_lexer_iterator_type> expression_grammar_type;
expression_lexer_type lexer;
expression_grammar_type grammar(lexer);
std::string s_ac = "this is a test\n\
#include \"test.dat\"\n\
";
std::string s;
myIterator pBegin(s_ac);
lex::tokenize_and_parse(pBegin, myIterator(s_ac, true), lexer, grammar, s);
}