我正在尝试用boost :: spirit库编写一个python解析器。这是代码:
template <typename Iterator>
class Parser : public qi::grammar<Iterator, space_type>
{
public:
Parser() : Parser::base_type(small_stmt)
{
NEWLINE = lit("<NEWLINE>");
INDENT = lit("<INDENT>");
DEDENT = lit("<DEDENT>");
ENDMARKER = lit("<EOT>");
NAME = identifier.alias();
NUMBER = integer|longinteger|floatnumber|imagnumber;
STRING = stringliteral.alias();
identifier = (alpha | '_') >> *(alpha | digit | '_');
stringliteral = -stringprefix >> (shortstring | longstring);
stringprefix = lit("r") | lit("u") | lit("ur") | lit("R") | lit("U") | lit("UR") | lit("Ur") | lit("uR") | lit("b") | lit("B") | lit("br") | lit("Br") | lit("bR") | lit("BR");
shortstring = "'" >> *(shortstringitem - "'") >> "'" | "\"" >> *(shortstringitem - "\"") >> "\"";
longstring = "'''" >> *longstringitem >> "'''" | "\"\"\"" >> *longstringitem >> "\"\"\"";
shortstringitem = shortstringchar | escapeseq;
longstringitem = longstringchar | escapeseq;
shortstringchar = char_ - "\\" - "\n";
longstringchar = char_ - "\\";
escapeseq = '\\' >> char_;
longinteger = integer >> (lit("l") | lit("L"));
integer = decimalinteger | octinteger | hexinteger | bininteger;
decimalinteger = nonzerodigit >> *digit | lit("0");
octinteger = lit("0") >> (lit("o") | lit("O")) >> +octdigit | lit("0") >> +octdigit;
hexinteger = lit("0") >> (lit("x") | lit("X")) >> +hexdigit;
bininteger = lit("0") >> (lit("b") | lit("B")) >> +bindigit;
nonzerodigit = char_('1', '9');
octdigit = char_('0', '7');
bindigit = lit("0") | lit("1");
hexdigit = digit | char_('a', 'f') | char_('A', 'F');
floatnumber = pointfloat | exponentfloat;
pointfloat = -intpart >> fraction | intpart >> ".";
exponentfloat = (intpart | pointfloat) >> exponent;
intpart = +digit;
fraction = "." >> +digit;
exponent = (lit("e") | lit("E")) >> -(lit("+") | lit("-")) >> +digit;
imagnumber = (floatnumber | intpart) >> (lit("j") | lit("J"));
single_input = NEWLINE|simple_stmt|compound_stmt >> NEWLINE;
file_input = *(NEWLINE|stmt) >> ENDMARKER;
eval_input = testlist >> *NEWLINE >> ENDMARKER;
decorator = lit("@") >> dotted_name >> -( lit("(") >> -(arglist) >> lit(")") ) >> NEWLINE;
decorators = +decorator;
decorated = decorators >> (classdef|funcdef);
funcdef = lit("def") >> NAME >> parameters >> lit(":") >> suite;
parameters = lit("(") >> -(varargslist) >> lit(")");
varargslist = (*(fpdef >> -(lit("=") >> test) >> lit(",")) >> (lit("*") >> NAME >> -(lit(",") >> lit("**") >> NAME)|lit("**") >> NAME)|fpdef >> -(lit("=") >> test) >> *(lit(",") >> fpdef >> -(lit("=") >> test)) >> -(lit(",")));
fpdef = NAME|lit("(") >> fplist >> lit(")");
fplist = fpdef >> *(lit(",") >> fpdef) >> -(lit(","));
stmt = simple_stmt|compound_stmt;
simple_stmt = small_stmt >> *(lit(";") >> small_stmt) >> -(lit(";")) >> NEWLINE;
small_stmt = (expr_stmt|print_stmt|del_stmt|pass_stmt|flow_stmt|import_stmt|global_stmt|exec_stmt|assert_stmt);
expr_stmt = testlist >> (augassign >> (yield_expr|testlist)|*(lit("=") >> (yield_expr|testlist)));
augassign = (lit("+=")|lit("-=")|lit("*=")|lit("/=")|lit("%=")|lit("&=")|lit("|=")|lit("^=")|lit("<<=")|lit(">>=")|lit("**=")|lit("//="));
print_stmt = lit("print") >> ( -( test >> *(lit(",") >> test) >> -(lit(",")) )|lit(">>") >> test >> -( +(lit(",") >> test) >> -(lit(",")) ) );
del_stmt = lit("del") >> exprlist;
pass_stmt = lit("pass");
flow_stmt = break_stmt|continue_stmt|return_stmt|raise_stmt|yield_stmt;
break_stmt = lit("break");
continue_stmt = lit("continue");
return_stmt = lit("return") >> -(testlist);
yield_stmt = yield_expr.alias();
raise_stmt = lit("raise") >> -(test >> -(lit(",") >> test >> -(lit(",") >> test)));
import_stmt = import_name|import_from;
import_name = lit("import") >> dotted_as_names;
import_from = (lit("from") >> (*lit(".") >> dotted_name|+lit(".")) >> lit("import") >> (lit("*")|lit("(") >> import_as_names >> lit(")")|import_as_names));
import_as_name = NAME >> -(lit("as") >> NAME);
dotted_as_name = dotted_name >> -(lit("as") >> NAME);
import_as_names = import_as_name >> *(lit(",") >> import_as_name) >> -(lit(","));
dotted_as_names = dotted_as_name >> *(lit(",") >> dotted_as_name);
dotted_name = NAME >> *(lit(".") >> NAME);
global_stmt = lit("global") >> NAME >> *(lit(",") >> NAME);
exec_stmt = lit("exec") >> expr >> -(lit("in") >> test >> -(lit(",") >> test));
assert_stmt = lit("assert") >> test >> -(lit(",") >> test);
compound_stmt = if_stmt|while_stmt|for_stmt|try_stmt|with_stmt|funcdef|classdef|decorated;
if_stmt = lit("if") >> test >> lit(":") >> suite >> *(lit("elif") >> test >> lit(":") >> suite) >> -(lit("else") >> lit(":") >> suite);
while_stmt = lit("while") >> test >> lit(":") >> suite >> -(lit("else") >> lit(":") >> suite);
for_stmt = lit("for") >> exprlist >> lit("in") >> testlist >> lit(":") >> suite >> -(lit("else") >> lit(":") >> suite);
try_stmt = (lit("try") >> lit(":") >> suite >> (+(except_clause >> lit(":") >> suite) >> -(lit("else") >> lit(":") >> suite) >> -(lit("finally") >> lit(":") >> suite)|lit("finally") >> lit(":") >> suite));
with_stmt = lit("with") >> with_item >> *(lit(",") >> with_item) >> lit(":") >> suite;
with_item = test >> -(lit("as") >> expr);
except_clause = lit("except") >> -(test >> -((lit("as")|lit(",")) >> test));
suite = simple_stmt|NEWLINE >> INDENT >> +stmt >> DEDENT;
testlist_safe = old_test >> -(+(lit(",") >> old_test) >> -(lit(",")));
old_test = or_test|old_lambdef;
old_lambdef = lit("lambda") >> -(varargslist) >> lit(":") >> old_test;
test = or_test >> -(lit("if") >> or_test >> lit("else") >> test)|lambdef;
or_test = and_test >> *(lit("or") >> and_test);
and_test = not_test >> *(lit("and") >> not_test);
not_test = lit("not") >> not_test|comparison;
comparison = expr >> *(comp_op >> expr);
comp_op = lit("<")|lit(">")|lit("==")|lit(">=")|lit("<=")|lit("<>")|lit("!=")|lit("in")|lit("not in")|lit("is")|lit("is not");
expr = xor_expr >> *(lit("|") >> xor_expr);
xor_expr = and_expr >> *(lit("^") >> and_expr);
and_expr = shift_expr >> *(lit("&") >> shift_expr);
shift_expr = arith_expr >> *((lit("<<")|lit(">>")) >> arith_expr);
arith_expr = term >> *((lit("+")|lit("-")) >> term);
term = factor >> *((lit("*")|lit("/")|lit("%")|lit("//")) >> factor);
factor = (lit("+")|lit("-")|lit("~")) >> factor|power;
power = atom >> *trailer >> -(lit("**") >> factor);
atom = (lit("(") >> -(yield_expr|testlist_comp) >> lit(")")|lit("-(") >> -(listmaker) >> lit(")")|lit("{") >> -(dictorsetmaker) >> lit("}")|lit("`") >> testlist1 >> lit("`")|NAME|NUMBER|+STRING);
listmaker = test >> ( list_for|*(lit(",") >> test) >> -(lit(",")) );
testlist_comp = test >> ( comp_for|*(lit(",") >> test) >> -(lit(",")) );
lambdef = lit("lambda") >> -(varargslist) >> lit(":") >> test;
trailer = lit("(") >> -(arglist) >> lit(")")|lit("[") >> subscriptlist >> lit("]")|lit(".") >> NAME;
subscriptlist = subscript >> *(lit(",") >> subscript) >> -(lit(","));
subscript = lit(".") >> lit(".") >> lit(".")|test|-(test) >> lit(":") >> -(test) >> -(sliceop);
sliceop = lit(":") >> -(test);
exprlist = expr >> *(lit(",") >> expr) >> -(lit(","));
testlist = test >> *(lit(",") >> test) >> -(lit(","));
dictorsetmaker = ( (test >> lit(":") >> test >> (comp_for|*(lit(",") >> test >> lit(":") >> test) >> -(lit(","))))|(test >> (comp_for|*(lit(",") >> test) >> -(lit(",")))) );
classdef = lit("class") >> NAME >> -(lit("(") >> -(testlist) >> lit(")")) >> lit(":") >> suite;
arglist = *(argument >> lit(",")) >> (argument >> -(lit(","))|lit("*") >> test >> *(lit(",") >> argument) >> -(lit(",") >> lit("**") >> test)|lit("**") >> test);
argument = test >> -(comp_for)|test >> lit("=") >> test;
list_iter = list_for|list_if;
list_for = lit("for") >> exprlist >> lit("in") >> testlist_safe >> -(list_iter);
list_if = lit("if") >> old_test >> -(list_iter);
comp_iter = comp_for|comp_if;
comp_for = lit("for") >> exprlist >> lit("in") >> or_test >> -(comp_iter);
comp_if = lit("if") >> old_test >> -(comp_iter);
testlist1 = test >> *(lit(",") >> test);
encoding_decl = NAME.alias();
yield_expr = lit("yield") >> -(testlist);
}
// LEXEMS
qi::rule<Iterator, space_type> NEWLINE;
qi::rule<Iterator, space_type> INDENT;
qi::rule<Iterator, space_type> DEDENT;
qi::rule<Iterator, space_type> ENDMARKER;
qi::rule<Iterator, space_type> NAME;
qi::rule<Iterator, space_type> NUMBER;
qi::rule<Iterator, space_type> STRING;
// IDENTIFIER
qi::rule<Iterator, space_type> identifier;
// STRING LITERAL
qi::rule<Iterator, space_type> stringliteral;
qi::rule<Iterator, space_type> stringprefix;
qi::rule<Iterator, space_type> shortstring;
qi::rule<Iterator, space_type> longstring;
qi::rule<Iterator, space_type> shortstringitem;
qi::rule<Iterator, space_type> longstringitem;
qi::rule<Iterator, space_type> shortstringchar;
qi::rule<Iterator, space_type> longstringchar;
qi::rule<Iterator, space_type> escapeseq;
// INTEGER LITERAL
qi::rule<Iterator, space_type> longinteger;
qi::rule<Iterator, space_type> integer;
qi::rule<Iterator, space_type> decimalinteger;
qi::rule<Iterator, space_type> octinteger;
qi::rule<Iterator, space_type> hexinteger;
qi::rule<Iterator, space_type> bininteger;
qi::rule<Iterator, space_type> nonzerodigit;
qi::rule<Iterator, space_type> octdigit;
qi::rule<Iterator, space_type> bindigit;
qi::rule<Iterator, space_type> hexdigit;
// FLOAT LITERAL
qi::rule<Iterator, space_type> floatnumber;
qi::rule<Iterator, space_type> pointfloat;
qi::rule<Iterator, space_type> exponentfloat;
qi::rule<Iterator, space_type> intpart;
qi::rule<Iterator, space_type> fraction;
qi::rule<Iterator, space_type> exponent;
//IMAGINARY LITERAL
qi::rule<Iterator, space_type> imagnumber;
// PYTHON GRAMMAR
qi::rule<Iterator, space_type> single_input;
qi::rule<Iterator, space_type> file_input;
qi::rule<Iterator, space_type> eval_input;
qi::rule<Iterator, space_type> decorator;
qi::rule<Iterator, space_type> decorators;
qi::rule<Iterator, space_type> decorated;
qi::rule<Iterator, space_type> funcdef;
qi::rule<Iterator, space_type> parameters;
qi::rule<Iterator, space_type> varargslist;
qi::rule<Iterator, space_type> fpdef;
qi::rule<Iterator, space_type> fplist;
qi::rule<Iterator, space_type> stmt;
qi::rule<Iterator, space_type> simple_stmt;
qi::rule<Iterator, space_type> small_stmt;
qi::rule<Iterator, space_type> expr_stmt;
qi::rule<Iterator, space_type> augassign;
qi::rule<Iterator, space_type> print_stmt;
qi::rule<Iterator, space_type> del_stmt;
qi::rule<Iterator, space_type> pass_stmt;
qi::rule<Iterator, space_type> flow_stmt;
qi::rule<Iterator, space_type> break_stmt;
qi::rule<Iterator, space_type> continue_stmt;
qi::rule<Iterator, space_type> return_stmt;
qi::rule<Iterator, space_type> yield_stmt;
qi::rule<Iterator, space_type> raise_stmt;
qi::rule<Iterator, space_type> import_stmt;
qi::rule<Iterator, space_type> import_name;
qi::rule<Iterator, space_type> import_from;
qi::rule<Iterator, space_type> import_as_name;
qi::rule<Iterator, space_type> dotted_as_name;
qi::rule<Iterator, space_type> import_as_names;
qi::rule<Iterator, space_type> dotted_as_names;
qi::rule<Iterator, space_type> dotted_name;
qi::rule<Iterator, space_type> global_stmt;
qi::rule<Iterator, space_type> exec_stmt;
qi::rule<Iterator, space_type> assert_stmt;
qi::rule<Iterator, space_type> compound_stmt;
qi::rule<Iterator, space_type> if_stmt;
qi::rule<Iterator, space_type> while_stmt;
qi::rule<Iterator, space_type> for_stmt;
qi::rule<Iterator, space_type> try_stmt;
qi::rule<Iterator, space_type> with_stmt;
qi::rule<Iterator, space_type> with_item;
qi::rule<Iterator, space_type> except_clause;
qi::rule<Iterator, space_type> suite;
qi::rule<Iterator, space_type> testlist_safe;
qi::rule<Iterator, space_type> old_test;
qi::rule<Iterator, space_type> old_lambdef;
qi::rule<Iterator, space_type> test;
qi::rule<Iterator, space_type> or_test;
qi::rule<Iterator, space_type> and_test;
qi::rule<Iterator, space_type> not_test;
qi::rule<Iterator, space_type> comparison;
qi::rule<Iterator, space_type> comp_op;
qi::rule<Iterator, space_type> expr;
qi::rule<Iterator, space_type> xor_expr;
qi::rule<Iterator, space_type> and_expr;
qi::rule<Iterator, space_type> shift_expr;
qi::rule<Iterator, space_type> arith_expr;
qi::rule<Iterator, space_type> term;
qi::rule<Iterator, space_type> factor;
qi::rule<Iterator, space_type> power;
qi::rule<Iterator, space_type> atom;
qi::rule<Iterator, space_type> listmaker;
qi::rule<Iterator, space_type> testlist_comp;
qi::rule<Iterator, space_type> lambdef;
qi::rule<Iterator, space_type> trailer;
qi::rule<Iterator, space_type> subscriptlist;
qi::rule<Iterator, space_type> subscript;
qi::rule<Iterator, space_type> sliceop;
qi::rule<Iterator, space_type> exprlist;
qi::rule<Iterator, space_type> testlist;
qi::rule<Iterator, space_type> dictorsetmaker;
qi::rule<Iterator, space_type> classdef;
qi::rule<Iterator, space_type> arglist;
qi::rule<Iterator, space_type> argument;
qi::rule<Iterator, space_type> list_iter;
qi::rule<Iterator, space_type> list_for;
qi::rule<Iterator, space_type> list_if;
qi::rule<Iterator, space_type> comp_iter;
qi::rule<Iterator, space_type> comp_for;
qi::rule<Iterator, space_type> comp_if;
qi::rule<Iterator, space_type> testlist1;
qi::rule<Iterator, space_type> encoding_decl;
qi::rule<Iterator, space_type> yield_expr;
};
问题在于,当我尝试解析简单文件时:
pass
经过一些词法模块后,女巫是:
pass <NEWLINE> <EOT>
解析失败并在第一个字符处停止。 当我尝试使用 pass_stmt 规则解析此文件时,一切正常(除了我们仍然拥有并保留,但传递单词被“消耗”)。 当我尝试用规则一级解析它 - small_stmt - 解析器停在
> <EOT>
消耗
pass <NEWLINE
一级 - simple_stmt 与 file_input 的结果相同 - 解析器在第一个字符处停止。
在添加PYTHON GRAMMAR部分中定义的语法之前(从http://docs.python.org/reference/grammar.html获取)一切正常。解析器识别标识符,文字,数字等。
有人知道这里有什么不对吗?