Ocaml:递归函数中Ast类型的构建列表

时间:2018-11-28 03:02:19

标签: parsing ocaml recursive-descent

我有一个手写的LL1解析器。我的AST并没有尽可能简化。语句部分如下所示:

type stmt_opt = StmtExpression of assignment | OptNil
[@@deriving show]
(*stmt_list -> stmt stmt_list | ε *)
type stmtlist =
    | StmtList of stmt * stmtlist
    | StmtlistNil 
[@@deriving show]

and stmt = 
| Assignment of assignment
| Return of stmt_opt 
| Parentheses of stmtlist
| If of assignment * stmt
| For of assignment * assignment * assignment * stmt
| While of assignment * stmt
(*“lparen” formals_opt “rparen” “LBRACE” vdecl_list stmt_list “RBRACE”*)
[@@deriving show]

如您所见,我仍然保留许多不必要的信息。我想像这样建立自己的声明:

type stmt =
    Block of stmt list
  | Expr of expr
  | Return of expr
  | If of expr * stmt * stmt
  | For of expr * expr * expr * stmt
  | While of expr * stmt

这样做我有点迷茫,因为我真的是用书本来构建我的LL1解析器的(我相信不会期望很长的语法):每个非终结符都有一个解析方法,每个解析方法都返回一个令牌表, ast。

我认为,为了像我的目标语句AST中那样构建Block类型,我需要在递归parseStmt方法中构建一个语句列表。我已将解析器代码简化为仅调用parseStmtList的解析器方法以及它们调用parseStmtList的特定实例

(*stmt_list = stmt stmt_list | epsilon*)
let rec parseStmtList tokenlist lst = 
    match tokenlist.head with 
    | Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
    | _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in 
          let new_lst = lst::stmt in
          let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
          (tokenlist_stmt_list, Ast.Block(stmt_lst))

(*stmt -> assignment SEMI 
|  RETURN stmt_opt SEMI
|  LBRACE stmt_list RBRACE 
|  IF LPAREN assignment RPAREN stmt 
|  FOR LPAREN assignment SEMI assignment SEMI assignment RPAREN stmt  
|  WHILE LPAREN assignment RPAREN stmt
*)

and parseStmt tokenlist = 
   begin
   match tokenlist.head with 
   | Lexer.ID identifier -> let (tokenlist_assignment, assignment) = parseAssignment tokenlist in
                begin
                match tokenlist_assignment.head with
                | Lexer.Semicolon -> (next tokenlist_assignment, Ast.Assignment(assignment))
                | _-> let err_msg = __LOC__ ^ "Syntax Error semicolon expected but received" ^ show_token_list tokenlist in
                     raise (Syntax_error err_msg) 
                end
         | Lexer.LeftBrace -> let tokenlist_leftbrace = next tokenlist in 
                        let (tokenlist_expr, expr) = parseStmtList tokenlist_leftbrace [] in
                        begin
                        match tokenlist_expr.head with
                        | Lexer.RightBrace -> (next tokenlist_expr, Ast.Parentheses(expr))
                        | _-> let err_msg = __LOC__ ^ "Syntax Error right brace expected but received" ^ show_token_list tokenlist in
                              raise (Syntax_error err_msg)
                        end
   | _-> let err_msg = __LOC__ ^ "Syntax Error left brace expected but received" ^ show_token_list tokenlist in
                    raise (Syntax_error err_msg)
    end

但是,我遇到了错误:

Error: This expression has type 'a -> token_list * Ast.stmtlist
       but an expression was expected of type 'b * 'c

对于let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in中的行parseStmtList

1 个答案:

答案 0 :(得分:1)

tokenlist_stmt new_lst |> parseStmtList

这里,您要将tokenlist_stmt应用于自变量new_lst,然后将parseStmtList应用于结果。但是tokenlist_stmt实际上不是函数,因此这是类型错误。

大概您打算用parseStmtListtokenlist_stmt作为两个参数来调用new_lst。语法很简单:

parseStmtList tokenlist_stmt new_lst

另外,lst::stmt也是类型错误,原因有两个:

  1. ::将列表作为其右侧操作数,而不是左侧,因此应为stmt::lst
  2. lst实际上不是列表,而是Ast.Block,因为这是parseStmtList返回的内容。

修复所有问题后,您会注意到该列表将以错误的方式出现(大概是因为这是您首先尝试lst::stmt的原因,但是您无法在列表末尾添加内容)像这样的列表)。在使用累加器构建列表时,这是一个常见问题。解决方案是要么在构建完列表后就反转列表,要么首先不使用累加器。


需要指出的一件事是,在使用Ast.stmtlist时,所有这些问题也将适用。也就是说,如果您的代码如下所示:

let new_lst = Ast.StmtList(lst, stmt) in
let (tokenlist_stmt_list, stmt_list) = tokenlist_stmt new_lst |> parseStmtList in
(tokenlist_stmt_list, Ast.Block(stmt_lst))

然后您将得到完全相同的错误。这使我认为,您已更改了不必要的代码方式。由于您的旧代码可能正常工作,因此我假设它看起来像这样:

let rec parseStmtList tokenlist = 
    match tokenlist.head with 
    | Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )
    | _ -> let (tokenlist_stmt, stmt) = parseStmt tokenlist in 
          let (tokenlist_stmt_list, stmt_list) = parseStmtList tokenlist_stmt in
          (tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))

然后在parseStmt中,您拥有:

let (tokenlist_stmtlist, stmtlist) = parseStmtList tokenlist_leftbrace in
begin
  match tokenlist_expr.head with
  | Lexer.RightBrace -> (next tokenlist_stmtlist, Ast.Block(stmtlist))

现在,除去Ast.stmtlist之后,您需要更改的是实际使用其构造函数的部分,并用列表构造函数(::[]替换那些部分。因此,parseStmt中的代码将完全保持不变,parseStmtList中的唯一更改应该是替换行

| Lexer.RightBrace -> (tokenlist, Ast.StmtlistNil )

使用

| Lexer.RightBrace -> (tokenlist, [] )

和行

(tokenlist_stmt_list, Ast.StmtList (stmt, stmt_lst))

使用

(tokenlist_stmt_list, stmt :: stmt_lst)

如果您的旧代码看起来与上面的代码有所不同,则可能必须更改不同的行,但想法仍然相同:将Ast.StmtList替换为::Ast.StmtListNil[]

就是这样。这就是所有必要的更改。您太复杂了。