假设我有一个类似以下的字符串
echo "foo" "bar\"blub""baz" "'" "\"" foo\ bar "\\" '\'' """" Lots" "of\ whitespace
现在我想将上面的字符串作为命令执行,就像通过调用Unix.execv
将其输入shell一样。如果我没有犯任何错误,那么shell会将上面的内容解析为以下ocaml列表:
["echo"; "foo"; "bar\"blubbaz"; "'"; "\""; "foo bar"; "\\"; "'", "", "Lots of whitespace"]
哪个库让我从原始字符串到解析列表?
最终我想将结果列表交给Unix.execvpe
。还有Unix.open_process_full
能够使用/bin/sh
处理我的原始字符串,但我发现在没有/bin/sh
的情况下直接调用外部程序时,我的应用程序速度提高了16%。现在我希望能够接受更多输入字符串,包括引用和转义。
我是否必须自己编写解析器?
存在POSIX函数wordexp
但是包装该函数不能解决我的问题,因为wordexp
比我想要的更多(命令替换,计算globs,替换tilda和环境变量)。
我只希望解析引用和转义。
答案 0 :(得分:0)
我使用ocamllex制定了解决这个问题的方法。将其发布在此处,以防其他人想要做类似的事情。它应该可以轻松扩展,以包含超出当前支持的转义字符和其他shell功能范围的功能。
{
exception UnknownShellEscape of string
exception UnmatchedChar of char
let buf_from_str str =
let buf = Buffer.create 16 in
Buffer.add_string buf str;
buf
}
let safechars = [^ '"' ''' '\\' ' ' '\t']+
let space = [ ' ' '\t' ]+
rule shell_command argv = parse
| space { shell_command argv lexbuf }
| safechars { uquote argv (buf_from_str (Lexing.lexeme lexbuf)) lexbuf }
| '\\' '"' { uquote argv (buf_from_str "\"") lexbuf }
| '\\' ''' { uquote argv (buf_from_str "'") lexbuf }
| '\\' '\\' { uquote argv (buf_from_str "\\") lexbuf }
| '\\' ' ' { uquote argv (buf_from_str " ") lexbuf }
| '\\' _ as c { raise (UnknownShellEscape c) }
| '"' { dquote argv (Buffer.create 16) lexbuf }
| ''' { squote argv (Buffer.create 16) lexbuf }
| _ as c { raise (UnmatchedChar c) }
| eof { List.rev argv }
and uquote argv buf = parse
| (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| '\\' '"' { Buffer.add_string buf "\""; uquote argv buf lexbuf }
| '\\' ''' { Buffer.add_string buf "'"; uquote argv buf lexbuf }
| '\\' '\\' { Buffer.add_string buf "\\"; uquote argv buf lexbuf }
| '\\' ' ' { Buffer.add_string buf " "; uquote argv buf lexbuf }
| '\\' _ as c { raise (UnknownShellEscape c) }
| '"' { dquote argv buf lexbuf }
| ''' { squote argv buf lexbuf }
| safechars { Buffer.add_string buf (Lexing.lexeme lexbuf); uquote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
and dquote argv buf = parse
| '"' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| '"' '"' { dquote argv buf lexbuf }
| '"' ''' { squote argv buf lexbuf }
| '"' { uquote argv buf lexbuf }
| '\\' '"' { Buffer.add_string buf "\""; dquote argv buf lexbuf }
| '\\' '\\' { Buffer.add_string buf "\\"; dquote argv buf lexbuf }
| '\\' _ as c { raise (UnknownShellEscape c) }
| [^ '"' '\\' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); dquote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
and squote argv buf = parse
| ''' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| ''' ''' { squote argv buf lexbuf }
| ''' '"' { dquote argv buf lexbuf }
| ''' { uquote argv buf lexbuf }
| [^ ''' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); squote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
{
let main () =
let cin =
if Array.length Sys.argv > 1
then open_in Sys.argv.(1)
else stdin
in
let lexbuf = Lexing.from_channel cin in
let argv = shell_command [] lexbuf in
List.iter (Printf.printf "%s\n") argv
let _ = Printexc.print main ()
}
尝试运行:
$ ocamllex test.mll
$ echo 'echo "foo" "bar\\"blub""baz" "'\''" "\\"" foo\\ bar '\
> '"\\\\" """"'\'''\'''\'''\''"" Lots" "of\\ whitespace' \
> | ocaml test.ml
echo
foo
bar"blubbaz
'
"
foo bar
\
Lots of whitespace
成功! \ O /