最近,我在这里问了一个问题: Boost Spirit Segfault In Parser
在这篇文章中,有人指出我正在使用的语法绝对是左递归的,并且它的精神是PEG解析器生成器,这意味着不可能进行左递归。
我使用《龙书》中处理左递归的部分中的规则,将语法转换为非左递归语法。
给出左递归语法
A -> A >> alpha | beta
可以通过以下操作将其转换为等效的右递归语法:
A -> beta >> A'
A' -> alpha >> A' | epsilon
这里是生成的解析器,我认为是非左递归生成:
namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
using boost::phoenix::ref;
start %= functionList >> endList >> qi::eoi;
// different expressions
exp %=
qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
|
qi::token(k_numericTerminal) >> expPrime
|
qi::token(k_trueTok) >> expPrime
|
qi::token(k_falseTok) >> expPrime;
expPrime %=
qi::token(k_equalTo) >> exp >> expPrime
|
qi::token(k_notEq) >> exp >> expPrime
|
qi::token(k_less) >> exp >> expPrime
|
qi::token(k_lessEq) >> exp >> expPrime
|
qi::token(k_greater) >> exp >> expPrime
|
qi::token(k_greaterEq) >> exp >> expPrime
|
qi::token(k_andTok) >> exp >> expPrime
|
qi::token(k_orTok) >> exp >> expPrime
|
qi::token(k_notTok) >> exp
|
qi::token(k_plues) >> exp >> expPrime
|
qi::token(k_minus) >> exp >> expPrime
|
qi::token(k_mult) >> exp >> expPrime
|
qi::token(k_minus) >> exp
|
qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::eps;
// parameter list
paramList %= exp >> paramListPrime;
paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
|
qi::eps;
// return statements
returnStatement %= qi::token(k_returnTok) >> exp
|
qi::token(k_returnTok);
// function call statements
callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);
// variable assignment
assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;
// list of integers
intList %= qi::token(k_numericTerminal) >> intListPrime;
intListPrime %=
qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
|
qi::eps;
// print out a variable
printStatement %= qi::token(k_print) >> exp;
// take input
inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);
// conditional statement
conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;
// consitions have optional else
optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
|
qi::eps;
// while loop
whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);
// actual program statements
endList %= end >> endListPrime;
endListPrime %= end >> endListPrime
|
qi::eps;
// end possibilities of program in global space
end %= callStatement
|
printStatement
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp;
// function parameters
param %=
qi::token(k_alphaTerminal) >> paramPrime
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
>> paramPrime;
// for handling left recursion in paramlist
paramPrime %=
qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
|
qi::eps;
// define a statement as assignment print input condition while or call
statement %=
assignmentStatement
|
printStatement
|
inputStatement
|
conditionStatement
|
whileStatement
|
callStatement
|
returnStatement;
// general statement list
statements %= statement >> statementsPrime;
// for handling left recursion in statements
statementsPrime %= statement >> statementsPrime
|
qi::eps;
// functions
functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> param >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
|
qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
| qi::eps;
BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
debug(start);
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
这是词法分析器
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
namespace interpreter
{
namespace lex = boost::spirit::lex;
enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_alphaTerminal = 34,
k_numericTerminal = 35
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9]*"),
numericTerminal("[0-9]*")
{
this->self("WHITESPACE") = whiteSpace;
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};
这是我的示例测试程序
int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
interpreter::LexerTokens< lexer_type > lexer;
interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)");
char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}
这些修订引起了一些问题,首先,通过非正式地查看语法,而不构造完整的LL(1)解析表,我认为该语法不是LL(1)。我仍然需要验证这一点,但我想知道精神能否解析此语法?我知道PEG通常使用/运算符进行超前查询,请问Spirit目前是否这样做?我在另一篇文章中读到,精神可能不会吗?
第二,该语法失败了,我注意到在开始制作中简化并制作开始制作时:
start %= functionList;
,然后将输入更改为:
def print_it(x, y): print 3*x + y return fed
语法调试语句指出解析成功。但是,我看到剩下的字符串是:
print_it(x, y): print 3*x + y return fed
因此只有第一个令牌实际被解析了。经过一番调试后,我不确定为什么解析成功,为什么只使用一个符号,这可能是词法分析器的问题吗?
此外,当我将开始制作更改为:
start %= endList;
并使用输入
y = x
但是,它无法解析,仅使用字符y。
最后,我的debug语句的输出不是很有帮助,当与debug语句一起运行时,产生的输出为:
<start>
<try>[][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0
我假设是指从二十个空的[]中尝试语法中的二十个产生式,这是一个正确的假设吗?又为什么[]为空?我通常会看到一些对调试有用的文本。是否因为匹配了正则表达式而使语法自动成功?如果是这样,是否有一种方法可以使debug语句在使用令牌枚举令牌而不是使用宏添加表达式时打印有用的输出?
感谢任何帮助或指出正确方向,谢谢。
答案 0 :(得分:2)
我假设是说从20个空的[]中尝试了20个乘积,这是一个正确的假设吗?
不。 []
表示输入令牌。
为什么[]为空?
可能没有有用的方法来打印它们,因此它们显示为空。
如果是这种情况,那么使用令牌枚举令牌而不是使用宏添加表达式时,是否有一种方法可以使debug语句打印出有用的输出?
我会这样认为。但是我从不使用Lex。因此,可能需要一段时间才能弄清楚。
引起我注意的第一件事是:
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
您的AttributeTypes说omit
。确实,更改为
typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;
确实显示出生命迹象。对于输入x=y
(无空格!),它将输出:
<start>
<try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining
R is false
现在,对于def print_it(x, y): print 3*x + y return fed
输入,输出仍然是:
<start>
<try>[def][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed
R is false
更多信息。有趣的是,它似乎在第一个空白处也失败了。 whiteSpace
正则表达式看起来还不错,所以我searched for lex in_state
in my answers记得我曾经学到的有关跳过和Lex的知识。
我玩了一些建议。在第二篇文章之后,我对此发表了评论:
好!您可以添加到答案中,在词法分析器内部具有一个单独的状态以跳过空白还有另一个相当大的缺点:缺乏调试支持。如果使用单独的状态进行跳过,然后使用BOOST_SPIRIT_DEBUG_NODE,则不会获得令牌流的完整输出。这是因为默认的调试处理程序使词法分析器迭代器前进以获得令牌,词法分析器当然处于INITIAL状态。一旦它遇到空白,迭代就会停止,因为词法分析器找不到匹配项。令牌流将在规则跟踪的第一个空白处被剪切。 – noxmetus Sep 20 '17 at 18:28
为了好玩,让我们尝试不使用单独的状态,而是使用Troubles with boost::spirit::lex & whitespace中的状态。
现在,表达式无法解析,因为
print_it
不是有效的标识符。让我们将_
添加到alphaTerminal
alphaTerminal
要求=
遵循。修正表达式:
exp = (
qi::token(k_alphaTerminal)
| qi::token(k_numericTerminal)
| qi::token(k_trueTok)
| qi::token(k_falseTok))
>> expPrime
;
实际上,这些令牌ID无效。您需要以min_token_id
(即0x10000)开头:
enum Tokens
{
k_andTok = lex::tokenids::min_token_id + 1,
k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
k_alphaTerminal, k_numericTerminal,
};
您为什么还要指定令牌ID?从您将对标记定义的引用传递给解析器的事实进行猜测,我认为您将要使用它们?
exp
= (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
;
此外,让我们为所有规则启用调试:
BOOST_SPIRIT_DEBUG_NODES(
(start) (functionList) (endList) (endListPrime) (param)
(paramPrime) (paramList) (paramListPrime) (statements)
(statementsPrime) (statement) (assignmentStatement)
(printStatement) (inputStatement) (conditionStatement)
(whileStatement) (callStatement) (returnStatement) (exp)
(expPrime) (intList) (intListPrime) (optionalElse) (end)
)
而且,在花了太多时间试图理解为什么状态切换船长似乎不起作用之后,我放弃了。这是我修修补补的结果:
//#define USE_STATES
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace lex = boost::spirit::lex;
namespace interpreter {
enum Tokens
{
k_andTok = lex::tokenids::min_token_id + 1,
k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
k_alphaTerminal, k_numericTerminal,
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9_]*"),
numericTerminal("[0-9]*")
{
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);
#ifdef USE_STATES
this->self("WHITESPACE") = whiteSpace;
#else
this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
#endif
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
start
= functionList >> endList >> qi::eoi
;
// different expressions
exp
= (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
;
expPrime
= tok.equalTo >> exp >> expPrime
| tok.notEq >> exp >> expPrime
| tok.less >> exp >> expPrime
| tok.lessEq >> exp >> expPrime
| tok.greater >> exp >> expPrime
| tok.greaterEq >> exp >> expPrime
| tok.andTok >> exp >> expPrime
| tok.orTok >> exp >> expPrime
| tok.notTok >> exp
| tok.plus >> exp >> expPrime
| tok.minus >> exp >> expPrime
| tok.mult >> exp >> expPrime
| tok.minus >> exp
| tok.leftParen >> exp >> tok.rightParen
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
| tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
| qi::eps
;
// parameter list
paramList
= exp >> paramListPrime
;
paramListPrime
= tok.comma >> exp >> paramListPrime
| qi::eps
;
// return statements
returnStatement
= tok.returnTok >> exp
| tok.returnTok
;
// function call statements
callStatement
= tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
;
// variable assignment
assignmentStatement
= tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.leftBracket >> exp
>> tok.rightBracket >> tok.assign >> exp
;
// list of integers
intList
= tok.numericTerminal >> intListPrime
;
intListPrime
= tok.comma >> tok.numericTerminal >> intListPrime
| qi::eps
;
// print out a variable
printStatement
= tok.print >> exp
;
// take input
inputStatement
= tok.alphaTerminal >> tok.input
;
// conditional statement
conditionStatement
= tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
;
// consitions have optional else
optionalElse
= tok.elseTok >> tok.colon >> statements
| qi::eps
;
// while loop
whileStatement
= tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
;
// actual program statements
endList
= end >> endListPrime
;
endListPrime
= end >> endListPrime
| qi::eps
;
// end possibilities of program in global space
end
= callStatement
| printStatement
| tok.alphaTerminal >> tok.assign >> tok.input
| tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
>> tok.rightBracket
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
>> tok.assign >> exp
;
// function parameters
param
= tok.alphaTerminal >> paramPrime
| tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
>> paramPrime
;
// for handling left recursion in paramlist
paramPrime
= tok.comma >> tok.alphaTerminal >> paramPrime
| qi::eps
;
// define a statement as assignment print input condition while or call
statement
= assignmentStatement
| printStatement
| inputStatement
| conditionStatement
| whileStatement
| callStatement
| returnStatement
;
// general statement list
statements
= statement >> statementsPrime
;
// for handling left recursion in statements
statementsPrime
= statement >> statementsPrime
| qi::eps
;
// functions
functionList
= tok.def >> tok.alphaTerminal >> tok.leftParen
>> param >> tok.rightParen >> tok.colon
>> statements >> tok.fed
| tok.def >> tok.alphaTerminal >> tok.leftParen
>> tok.rightParen >> tok.colon >> statements >> tok.fed
| qi::eps
;
BOOST_SPIRIT_DEBUG_NODES(
(start) (functionList) (endList) (endListPrime) (param)
(paramPrime) (paramList) (paramListPrime) (statements)
(statementsPrime) (statement) (assignmentStatement)
(printStatement) (inputStatement) (conditionStatement)
(whileStatement) (callStatement) (returnStatement) (exp)
(expPrime) (intList) (intListPrime) (optionalElse) (end)
)
}
private:
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
#include <fstream>
#include <iterator>
int main(int argc, char** argv) {
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
#ifdef USE_STATES
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
#else
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::unused_type skipper_type;
#endif
interpreter::LexerTokens<lexer_type> lexer;
interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);
// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}
std::ifstream t(argv[1]);
std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };
char const* first = sourceCode.data();
char const* last = first + sourceCode.size();
#ifdef USE_STATES
bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
#else
bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
#endif
std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
std::cout << "R is " << std::boolalpha << ok << std::endl;
}
打印
<start>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<functionList>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<param>
<try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[)][:][print][3][*][x][+][y][return][fed]</try>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</param>
<statements>
<try>[print][3][*][x][+][y][return][fed]</try>
<statement>
<try>[print][3][*][x][+][y][return][fed]</try>
<assignmentStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<exp>
<try>[3][*][x][+][y][return][fed]</try>
<expPrime>
<try>[*][x][+][y][return][fed]</try>
<exp>
<try>[x][+][y][return][fed]</try>
<expPrime>
<try>[+][y][return][fed]</try>
<exp>
<try>[y][return][fed]</try>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<success>[return][fed]</success>
<attributes>[]</attributes>
</printStatement>
<success>[return][fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[return][fed]</try>
<statement>
<try>[return][fed]</try>
<assignmentStatement>
<try>[return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[return][fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[return][fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[return][fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[return][fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[return][fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[return][fed]</try>
<exp>
<try>[fed]</try>
<fail/>
</exp>
<success>[fed]</success>
<attributes>[]</attributes>
</returnStatement>
<success>[fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[fed]</try>
<statement>
<try>[fed]</try>
<assignmentStatement>
<try>[fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[fed]</try>
<fail/>
</returnStatement>
<fail/>
</statement>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statements>
<success></success>
<attributes>[]</attributes>
</functionList>
<endList>
<try></try>
<end>
<try></try>
<callStatement>
<try></try>
<fail/>
</callStatement>
<printStatement>
<try></try>
<fail/>
</printStatement>
<fail/>
</end>
<fail/>
</endList>
<fail/>
</start>
Remaining ''
R is false