我使用menhir来定义ML中现代编译器实现中描述的Tiger语言,here是手册:
exp:
| lv = lvalue { Lvalue lv }
| i = INT { Int i }
| s = STRING { String s }
......
lvalue:
| i = ID { Id i }
| lv = lvalue DOT i = ID
{ Field_exp (lv, i) }
| lv = lvalue LBRACKET e = exp RBRACKET
{ Subscript (lv, e) }
但是,此语法无法识别a[3]
之类的字符串,但会识别a.b
。
然后我向lvalue添加另一条规则
| i = ID LBRACKET e = exp RBRACKET
{ Subscript (Id i, e) }
现在我的语法识别a[3]
,但它无法识别a[r]
这对我来说似乎很奇怪。为什么我的原始语法不能识别a[3]
,为什么不能识别a[r]
?
更新
这是我的语法:
%{
open Lexing
open Ast
%}
%token <int> INT
%token <string> ID
%token <string> STRING
%token ARRAY BREAK DO ELSE END FOR
%token FUNCTION IF IN LET NIL OF
%token THEN TO TYPE VAR WHILE
%token LPAREN RPAREN LBRACE RBRACE
%token LBRACKET RBRACKET
%token COLON SEMICOL COLONEQ
%token DOT COMMA
%token PLUS MINUS TIMES DIVIDE
%token EQ NEQ GT GE
%token LT LE AMPERSAND PIPE
%token EOF
%nonassoc OF
%nonassoc DO
%nonassoc THEN
%nonassoc ELSE
%nonassoc COLONEQ
%left PIPE
%left AMPERSAND
%nonassoc EQ NEQ LT LE GT GE
%left PLUS MINUS
%left TIMES DIVIDE
%nonassoc UMINUS
%start <Ast.exp> prog
%%
prog:
| e = exp EOF { e }
;
exp:
| NIL { Nil }
| BREAK { Break }
| i = INT { Int i }
| s = STRING { String s }
| lv = lvalue { Lvalue lv }
| LPAREN es = exp_seq RPAREN
{ match es with
| [e] -> e
| es -> Exp_seq es
}
| MINUS e = exp %prec UMINUS
{ Negation_exp e }
| i = ID LPAREN es = exp_list RPAREN (* function call *)
{ Call_exp (i, es) }
| i = ID LBRACE fc = field_create_list RBRACE
{ Rec_create (i, fc) }
| i = ID LBRACKET e1 = exp RBRACKET OF e2 = exp
{ Arr_create (i, e1, e2) }
| lv = lvalue COLONEQ e = exp
{ Assignment (lv, e) }
| IF test = exp THEN then_exp = exp ELSE else_exp = exp
{ Ifthenelse (test, then_exp, else_exp) }
| IF test = exp THEN then_exp = exp
{ Ifthen (test, then_exp) }
| WHILE e1 = exp DO e2 = exp
{ Whileexp (e1, e2) }
| FOR i = ID COLONEQ e0 = exp TO e1 = exp DO e2 = exp
{ Forexp (i, e0, e1, e2) }
| LET decls = decl_list IN es = exp_seq END
{ Letexp (decls, es)}
| ae = arith_exp { ArithExp ae }
| ce = cmp_exp { CmpExp ce }
| be = bool_exp { BoolExp be }
lvalue:
| i = ID { Id i }
| lv = lvalue DOT i = ID
{ Field_exp (lv, i) }
| i = ID LBRACKET e = exp RBRACKET
{ Subscript (Id i, e) }
| lv = lvalue LBRACKET e = exp RBRACKET
{ Subscript (lv, e) }
exp_seq:
| es = separated_list(SEMICOL, exp) { es }
exp_list:
| el = separated_list(COMMA, exp) { el }
field_create_list:
| fc = separated_list(COMMA, field_create) { fc }
field_create:
| i = ID EQ e = exp { (i, e) }
decl_list:
| dl = nonempty_list(decl) { dl }
decl:
| TYPE i = ID EQ t = ty { Type_decl (i, t) }
| VAR i = ID COLONEQ e = exp { Var_decl (i, None, e) }
| VAR i = ID COLON tid = ID COLONEQ e = exp
{ Var_decl (i, Some tid, e) }
| FUNCTION i = ID LPAREN fd = field_decl_list RPAREN EQ e = exp
{ Func_decl (i, fd, None, e) }
| FUNCTION i = ID LPAREN fd = field_decl_list RPAREN COLON tid = ID EQ e = exp
{ Func_decl (i, fd, Some tid, e)}
ty:
| i = ID { Type_id i }
| ARRAY OF tid = ID { Array_ty tid }
| LBRACE fd = field_decl_list RBRACE
{ Rec_ty fd }
field_decl_list:
| fl = separated_list(COMMA, field_decl) { fl }
field_decl:
| i = ID COLON tid = ID { (i, tid) }
%inline arith_exp:
| e1=exp PLUS e2=exp { Add(e1, e2) }
| e1=exp MINUS e2=exp { Sub(e1, e2) }
| e1=exp TIMES e2=exp { Mul(e1, e2) }
| e1=exp DIVIDE e2=exp { Div(e1, e2) }
%inline cmp_exp:
| e1=exp EQ e2=exp { Eq(e1, e2) }
| e1=exp NEQ e2=exp { Neq(e1, e2) }
| e1=exp LT e2=exp { Lt(e1, e2) }
| e1=exp LE e2=exp { Le(e1, e2) }
| e1=exp GT e2=exp { Gt(e1, e2) }
| e1=exp GE e2=exp { Ge(e1, e2) }
%inline bool_exp:
| e1=exp AMPERSAND e2=exp { And(e1, e2) }
| e1=exp PIPE e2=exp { Or(e1, e2) }
词法:
{
open Lexing
open Parser
exception SyntaxError of string
let next_line lexbuf =
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <-
{ pos with pos_bol = lexbuf.lex_curr_pos;
pos_lnum = pos.pos_lnum + 1
}
}
let int = ['0' - '9'] ['0' - '9']*
let newline = '\n' | 'r' | "\r\n"
let whitespace = [' ' '\t']+
let id = ['a' - 'z' 'A' - 'Z' '_'] ['a' - 'z' 'A' - 'Z' '0' - '9' '_']*
rule read = parse
| whitespace { read lexbuf }
| newline { next_line lexbuf; read lexbuf }
| '"' { read_string (Buffer.create 17) lexbuf }
| "/*" { comment lexbuf; read lexbuf }
| eof { EOF }
| '(' { LPAREN }
| ')' { RPAREN }
| '{' { LBRACE }
| '}' { RBRACE }
| '[' { LBRACKET }
| ']' { RBRACKET }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| '/' { DIVIDE }
| '=' { EQ }
| "<>" { NEQ }
| "<" { LT }
| "<=" { LE }
| '>' { GT }
| ">=" { GE }
| '&' { AMPERSAND }
| '|' { PIPE }
| ":=" { COLONEQ }
| ';' { SEMICOL }
| ':' { COLON }
| '.' { DOT }
| ',' { COMMA }
| "array" { ARRAY }
| "break" { BREAK }
| "do" { DO }
| "else" { ELSE }
| "end" { END }
| "for" { FOR }
| "function" { FUNCTION }
| "if" { IF }
| "in" { IN }
| "let" { LET }
| "nil" { NIL }
| "of" { OF }
| "then" { THEN }
| "to" { TO }
| "type" { TYPE }
| "var" { VAR }
| "while" { WHILE }
| int { INT (int_of_string (Lexing.lexeme lexbuf)) }
| id { ID (Lexing.lexeme lexbuf) }
and read_string buf =
parse
| '"' { STRING (Buffer.contents buf) }
| '\\' '/' { Buffer.add_char buf '/'; read_string buf lexbuf }
| '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
| '\\' 'b' { Buffer.add_char buf '\b'; read_string buf lexbuf }
| '\\' 'f' { Buffer.add_char buf '\012'; read_string buf lexbuf }
| '\\' 'n' { Buffer.add_char buf '\n'; read_string buf lexbuf }
| '\\' 'r' { Buffer.add_char buf '\r'; read_string buf lexbuf }
| '\\' 't' { Buffer.add_char buf '\t'; read_string buf lexbuf }
| [^ '"' '\\']+ { Buffer.add_string buf (Lexing.lexeme lexbuf);
read_string buf lexbuf }
| _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) }
| eof { raise (SyntaxError ("String is not terminated")) }
and comment = parse
| "*/" { () }
| "/*" { comment lexbuf; comment lexbuf }
| _
和ast.ml
type exp =
| Nil
| Break
| Int of int
| String of string
| Lvalue of lvalue
| Exp_seq of exp list
| Negation_exp of exp
| Call_exp of id * exp list
| Arr_create of type_id * exp * exp
| Rec_create of type_id * field_create list
| Assignment of lvalue * exp
| Ifthenelse of exp * exp * exp
| Ifthen of exp * exp
| Whileexp of exp * exp
| Forexp of id * exp * exp * exp
| Letexp of decl list * exp list
| ArithExp of arith_exp
| BoolExp of bool_exp
| CmpExp of cmp_exp
and decl =
| Type_decl of type_id * ty
| Func_decl of id * (field_decl list) * return_ty option * exp
| Var_decl of id * (type_id option) * exp
and id = string
and type_id = string
and return_ty = type_id
and array_type = type_id
and field_decl = id * type_id
and field_create = id * exp
and ty =
| Type_id of type_id
| Array_ty of array_type
| Rec_ty of field_decl list
and lvalue =
| Id of id
| Subscript of lvalue * exp
| Field_exp of lvalue * id
and arith_exp =
| Add of exp * exp
| Sub of exp * exp
| Mul of exp * exp
| Div of exp * exp
and bool_exp =
| Or of exp * exp
| And of exp * exp
and cmp_exp =
| Eq of exp * exp
| Neq of exp * exp
| Lt of exp * exp
| Le of exp * exp
| Gt of exp * exp
| Ge of exp * exp
答案 0 :(得分:3)
你的语法不是LL(1),正如你编译时menhir所述:
Warning: one state has shift/reduce conflicts.
Warning: one shift/reduce conflict was arbitrarily resolved.
您应该始终解决此类问题。当您提供选项-d
时,menhir将生成一个解释冲突的foo.conflicts
文件。
在这种情况下,问题来自这一行:
| i = ID LBRACKET e1 = exp RBRACKET OF e2 = exp
通过查找1,无法区分a[r]
和a[r] of e
。我建议改变你的语法。