dypgen中的模式匹配

时间:2016-06-13 11:46:17

标签: parsing ocaml lexical-analysis ambiguity

我想在dypgen处理一些含糊之处。我在手册中找到了一些我想知道的东西,我怎么能用它。 在手册中5.2和#34;符号上的模式匹配"有一个例子:

expr:
| expr OP<"+"> expr { $1 + $2 }
| expr OP<"*"> expr { $1 * $2 }

OP与&#34; +&#34;相匹配或者&#34; *&#34;,据我所知。我也找到了:

  

模式可以是任何Caml模式(但没有关键字时)。   例如,这是可能的:

expr: expr<(Function([arg1;arg2],f_body)) as f> expr
{ some action }

所以我试着把其他一些表达,但我不明白,会发生什么。如果我放入printf,它会输出匹配字符串的值。但是,如果我放入(fun x -> printf x),在我看来与printf相同,dypgen会抱怨语法错误并指向表达式的结尾。如果我在其中放置Printf.printf,则会抱怨Syntax error: operator expected。如果我放在那里(fun x -> Printf.printf x),它会说:Lexing failed with message: lexing: empty token 这些不同的错误消息意味着什么?

最后,我想在散列表中查找一些内容,如果值在那里,但我不知道,如果可能的话。是不是可能?

编辑:源自dypgen-demos的森林示例的最小示例。

语法文件forest_parser.dyp包含:

{
open Parse_tree
let dyp_merge = Dyp.keep_all
}

%start main
%layout [' ' '\t']

%%

main : np "." "\n" { $1 }

np:
  |    sg                   {Noun($1)}
  |    pl                   {Noun($1)}

sg: word    <Word("sheep"|"fish")>  {Sg($1)}
sg: word    <Word("cat"|"dog")>  {Sg($1)}
pl: word    <Word("sheep"|"fish")>  {Pl($1)}
pl: word    <Word("cats"|"dogs")>  {Pl($1)}

/* OR try:
    sg: word    <printf>  {Sg($1)}
    pl: word    <printf>  {Pl($1)}
*/

word: 
  | (['A'-'Z' 'a'-'z']+)    {Word($1)}

现在,forest.ml具有以下print_forest函数:

let print_forest forest =
  let rec aux1 t = match t with
    | Word x
    -> print_string x
    | Noun (x) -> (
        print_string "N [";
        aux1 x;
        print_string " ]")
    | Sg (x) -> (
        print_string "Sg [";
        aux1 x;
        print_string " ]")
    | Pl (x) -> (
        print_string "Pl [";
        aux1 x;
        print_string " ]")
  in
  let aux2 t = aux1 t; print_newline () in
  List.iter aux2 forest;
  print_newline ()

parser_tree.mli包含:

type tree = 
  | Word        of string
  | Noun        of tree
  | Sg          of tree
  | Pl          of tree

然后你可以确定鱼,羊,猫等是什么。

sheep or fish can be singular and plural. cats and dogs cannot.

fish.
N [Sg [fish ] ]
N [Pl [fish ] ]

1 个答案:

答案 0 :(得分:1)

我对Dypgen一无所知所以我试图解决它。

让我们看看我发现了什么。

parser.dyp 文件中,您可以定义词法分析器和解析器,也可以使用外部词法分析器。这是我做的:

我的看起来像这样:

<强> parse_prog.mli

type f = 
  | Print of string
  | Function of string list * string * string

type program = f list

<强> prog_parser.dyp

{
  open Parse_prog

  (* let dyp_merge = Dyp.keep_all *)    

  let string_buf = Buffer.create 10
}

%start main

%relation pf<pr

%lexer

let newline = '\n'
let space = [' ' '\t' '\r']
let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*

rule string = parse
  | '"' { () }
  | _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
      string lexbuf }

main lexer =
  newline | space + -> { () }
  "fun"  -> ANONYMFUNCTION { () }
  lident -> FUNCTION { Dyp.lexeme lexbuf }
  uident -> MODULE { Dyp.lexeme lexbuf }
  '"' -> STRING { Buffer.clear string_buf;
                  string lexbuf;
                  Buffer.contents string_buf }

%parser

main : function_calls eof                                          
   { $1 }

function_calls:
  |                                                                
    { [] }
  | function_call ";" function_calls                               
    { $1 :: $3 }

function_call:
  | printf STRING                                                  
    { Print $2 } pr
  | "(" ANONYMFUNCTION lident "->" printf lident ")" STRING        
    { Print $6 } pf
  | nested_modules "." FUNCTION STRING                             
    { Function ($1, $3, $4) } pf
  | FUNCTION STRING                                                
    { Function ([], $1, $2) } pf
  | "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING      
    { Function ([], $5, $8) } pf

printf:
  | FUNCTION<"printf">                                             
    { () }
  | MODULE<"Printf"> "." FUNCTION<"printf">                        
    { () }

nested_modules:
  | MODULE                                       
    { [$1] }
  | MODULE "." nested_modules                    
    { $1 :: $3 }

这个档案是最重要的。正如你所看到的,如果我有一个函数printf "Test",我的语法是模棱两可的,这可以简化为Print "Test"Function ([], "printf", "Test")但是,正如我意识到的那样,我可以优先考虑我的语法规则,因此如果一个优先级较高,那么它将是为第一次解析选择的规则。 (尝试取消注释let dyp_merge = Dyp.keep_all,您将看到所有可能的组合)。

在我的主要内容中:

<强> main.ml

open Parse_prog

let print_stlist fmt sl =
  match sl with 
    | [] -> ()
    | _ -> List.iter (Format.fprintf fmt "%s.") sl

let print_program tl =
  let aux1 t = match t with
      | Function (ml, f, p) -> 
        Format.printf "I can't do anything with %a%s(\"%s\")@." print_stlist ml f p
      | Print s -> Format.printf "You want to print : %s@." s
  in
  let aux2 t = List.iter (fun (tl, _) -> 
     List.iter aux1 tl; Format.eprintf "------------@.") tl in
  List.iter aux2 tl

let input_file = Sys.argv.(1)

let lexbuf = Dyp.from_channel (Forest_parser.pp ()) (Pervasives.open_in input_file)

let result = Parser_prog.main lexbuf

let () = print_program result

例如,对于以下文件:

<强>测试

printf "first print";
Printf.printf "nested print";
Format.eprintf "nothing possible";
(fun x -> printf x) "Anonymous print";

如果我执行./myexec test,我会收到以下提示

You want to print : first print
You want to print : nested print
I can't do anything with Format.eprintf("nothing possible")
You want to print : x
------------

所以, TL; DR ,手动示例就在这里向您展示您可以使用已定义的令牌(我从未定义令牌PRINT,只是FUNCTION)并匹配它们以获得新规则。

我希望很清楚,我从你的问题中学到了很多东西; - )

[编辑] 所以,我改变了解析器以匹配您想要观看的内容:

{
      open Parse_prog

      (* let dyp_merge = Dyp.keep_all *)

      let string_buf = Buffer.create 10
    }

    %start main

    %relation pf<pp

    %lexer

    let newline = '\n'
    let space = [' ' '\t' '\r']
    let uident = ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']*
    let lident = ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']*

    rule string = parse
      | '"' { () }
      | _ { Buffer.add_string string_buf (Dyp.lexeme lexbuf);
          string lexbuf }

    main lexer =
      newline | space + -> { () }
      "fun"  -> ANONYMFUNCTION { () }
      lident -> FUNCTION { Dyp.lexeme lexbuf }
      uident -> MODULE { Dyp.lexeme lexbuf }
      '"' -> STRING { Buffer.clear string_buf;
                      string lexbuf;
                      Buffer.contents string_buf }

    %parser

    main : function_calls eof                                          
       { $1 }

    function_calls:
      |                                                                
        { [] } pf
      | function_call <Function((["Printf"] | []), "printf", st)> ";" function_calls
        { (Print st) :: $3 } pp
      | function_call ";" function_calls                               
        { $1 :: $3 } pf


    function_call:
      | nested_modules "." FUNCTION STRING                          
        { Function ($1, $3, $4) }
      | FUNCTION STRING                             
        { Function ([], $1, $2) }
      | "(" ANONYMFUNCTION lident "->" FUNCTION lident ")" STRING
        { Function ([], $5, $8) }

    nested_modules:
      | MODULE                                       
        { [$1] }
      | MODULE "." nested_modules                    
        { $1 :: $3 }

在这里,正如你所看到的那样,当我解析它时,我没有处理我的函数是打印的事实,但是当我把它放在我的函数列表中时。所以,我匹配我的解析器构建的algebraic type。我希望这个例子对你来说没问题;-)(但要注意,这是非常含糊的!: - D)