解析shell引用的字符串为execv兼容的参数向量

时间:2015-04-01 22:20:53

标签: shell ocaml argv

假设我有一个类似以下的字符串

echo "foo" "bar\"blub""baz" "'" "\"" foo\ bar "\\" '\'' """"       Lots" "of\ whitespace

现在我想将上面的字符串作为命令执行,就像通过调用Unix.execv将其输入shell一样。如果我没有犯任何错误,那么shell会将上面的内容解析为以下ocaml列表:

["echo"; "foo"; "bar\"blubbaz"; "'"; "\""; "foo bar"; "\\"; "'", "", "Lots of whitespace"]

哪个库让我从原始字符串到解析列表?

最终我想将结果列表交给Unix.execvpe。还有Unix.open_process_full能够使用/bin/sh处理我的原始字符串,但我发现在没有/bin/sh的情况下直接调用外部程序时,我的应用程序速度提高了16%。现在我希望能够接受更多输入字符串,包括引用和转义。

我是否必须自己编写解析器?

存在POSIX函数wordexp但是包装该函数不能解决我的问题,因为wordexp比我想要的更多(命令替换,计算globs,替换tilda和环境变量)。

我只希望解析引用和转义。

1 个答案:

答案 0 :(得分:0)

我使用ocamllex制定了解决这个问题的方法。将其发布在此处,以防其他人想要做类似的事情。它应该可以轻松扩展,以包含超出当前支持的转义字符和其他shell功能范围的功能。

{
  exception UnknownShellEscape of string
  exception UnmatchedChar of char
  let buf_from_str str =
    let buf = Buffer.create 16 in
    Buffer.add_string buf str;
    buf
}

let safechars = [^ '"' ''' '\\' ' ' '\t']+
let space = [ ' ' '\t' ]+

rule shell_command argv = parse
 | space         { shell_command argv lexbuf }
 | safechars     { uquote argv (buf_from_str (Lexing.lexeme lexbuf)) lexbuf }
 | '\\' '"'      { uquote argv (buf_from_str "\"") lexbuf }
 | '\\' '''      { uquote argv (buf_from_str "'") lexbuf }
 | '\\' '\\'     { uquote argv (buf_from_str "\\") lexbuf }
 | '\\' ' '      { uquote argv (buf_from_str " ") lexbuf }
 | '\\' _ as c   { raise (UnknownShellEscape c) }
 | '"'           { dquote argv (Buffer.create 16) lexbuf }
 | '''           { squote argv (Buffer.create 16) lexbuf }
 | _ as c        { raise (UnmatchedChar c) }
 | eof { List.rev argv }
and uquote argv buf = parse
 | (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
 | '\\' '"'    { Buffer.add_string buf "\""; uquote argv buf lexbuf }
 | '\\' '''    { Buffer.add_string buf "'"; uquote argv buf lexbuf }
 | '\\' '\\'   { Buffer.add_string buf "\\"; uquote argv buf lexbuf }
 | '\\' ' '    { Buffer.add_string buf " "; uquote argv buf lexbuf }
 | '\\' _ as c { raise (UnknownShellEscape c) }
 | '"'         { dquote argv buf lexbuf }
 | '''         { squote argv buf lexbuf }
 | safechars   { Buffer.add_string buf (Lexing.lexeme lexbuf); uquote argv buf lexbuf }
 | _ as c      { raise (UnmatchedChar c) }
and dquote argv buf = parse
 | '"' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
 | '"' '"'         { dquote argv buf lexbuf }
 | '"' '''         { squote argv buf lexbuf }
 | '"'             { uquote argv buf lexbuf }
 | '\\' '"'        { Buffer.add_string buf "\""; dquote argv buf lexbuf }
 | '\\' '\\'       { Buffer.add_string buf "\\"; dquote argv buf lexbuf }
 | '\\' _ as c     { raise (UnknownShellEscape c) }
 | [^ '"' '\\' ]+  { Buffer.add_string buf (Lexing.lexeme lexbuf); dquote argv buf lexbuf }
 | _ as c          { raise (UnmatchedChar c) }
and squote argv buf = parse
 | ''' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
 | ''' '''         { squote argv buf lexbuf }
 | ''' '"'         { dquote argv buf lexbuf }
 | '''             { uquote argv buf lexbuf }
 | [^ ''' ]+       { Buffer.add_string buf (Lexing.lexeme lexbuf); squote argv buf lexbuf }
 | _ as c          { raise (UnmatchedChar c) }

{
  let main () =
    let cin =
      if Array.length Sys.argv > 1
      then open_in Sys.argv.(1)
      else stdin
    in
    let lexbuf = Lexing.from_channel cin in
    let argv = shell_command [] lexbuf in
    List.iter (Printf.printf "%s\n") argv

  let _ = Printexc.print main ()
}

尝试运行:

$ ocamllex test.mll
$ echo 'echo "foo" "bar\\"blub""baz" "'\''" "\\"" foo\\ bar '\
> '"\\\\" """"'\'''\'''\'''\''""       Lots" "of\\ whitespace' \
> | ocaml test.ml
echo
foo
bar"blubbaz
'
"
foo bar
\

Lots of whitespace

成功! \ O /