Split a string by commas but ignore commas within double-quotes using Javascript提供正则表达式/(".*?"|[^",\s]+)(?=\s*,|\s*$)/
。如何在Erlang re:split()
中使用它?正则表达式不适用于Erlang。
1> S = "20140419,\"Blah blah, foo foo\",1,0,0,0,1,2,0,0".
2> re:split(S, "(\".*?\"|[^\",\s]+,)(?=\s*,|\s*$)", [{return,list}]).
["20140421,","\"Blah blah, foo foo\"",",1,0,0,0,1,2,0,0"]
我正在寻找的结果是列表
["20140421","\"Blah blah, foo foo\"","1","0","0","0","1","2","0","0"]
感谢。
答案 0 :(得分:2)
只需将JavaScript正则表达式翻译为Erlang:
Erlang R16B03-1 (erts-5.10.4) [source] [64-bit] [smp:8:8] [async-threads:10] [kernel-poll:false]
Eshell V5.10.4 (abort with ^G)
1> S = "20140419,\"Blah blah, foo foo\",1,0,0,0,1,2,0,0".
"20140419,\"Blah blah, foo foo\",1,0,0,0,1,2,0,0"
2> {ok,R} = re:compile("(\".*?\"|[^\",\\s]+)(?=\\s*,|\\s*$)").
{ok,{re_pattern,1,0,
<<69,82,67,80,122,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,
...>>}}
3> {match,Matches} = re:run(S, R, [{capture,[1],list},global]).
{match,[["20140419"],
["\"Blah blah, foo foo\""],
["1"],
["0"],
["0"],
["0"],
["1"],
["2"],
["0"],
["0"]]}
4> [M || [M] <- Matches].
["20140419","\"Blah blah, foo foo\"","1","0","0","0","1",
"2","0","0"]
在shell命令2中注意在模式中使用双反斜杠来正确指定\s
。
答案 1 :(得分:1)
使用模式匹配获得解析器。如果有人发现它有用,请在此处添加。
parse_csv(String) -> parse_csv(String, [], [], [], false).
parse_csv([], S, Acc, [], _) -> lists:reverse(lists:map(fun(X) -> lists:reverse(lists:flatten(X)) end, [Acc|S]));
parse_csv([], S, [], L, _) -> lists:reverse(lists:map(fun(X) -> lists:reverse(lists:flatten(X)) end, [L|S]));
parse_csv(String, S, Acc, L, IsSubStr) ->
case String of
[$"|T] when IsSubStr =:= true ->
% end of substring (ending quote).
parse_csv(T, S, Acc, [$"|L], false);
[$"|T] when IsSubStr =:= false ->
% beginning of a substring (beginning quote).
parse_csv(T, S, Acc, [$"], true);
[$,|T] when IsSubStr =:= true andalso L =/= [] ->
% comma within a substring
parse_csv(T, S, Acc, [$,|L], true);
[$,|T] when IsSubStr =:= false andalso L =/= [] ->
% comma after a substring.
parse_csv(T, [[L|Acc]|S], [], [], false);
[$,|T] when IsSubStr =:= false andalso L =:= [] ->
% comma after a normal string.
parse_csv(T, [Acc|S], [], [], false);
[H|T] when IsSubStr =:= true ->
% within a substring
parse_csv(T, S, Acc, [H|L], true);
[H|T] when IsSubStr =:= false ->
% a normal string
parse_csv(T, S, [H|Acc], [], false) end.
示例:
2> ql:parse_csv("foo,\"bar aa\",blah,\"dooo\",phew").
["foo","\"bar aa\"","blah","\"dooo\"","phew"]
3> ql:parse_csv("foo,bar,baz").
["foo","bar","baz"]
4> ql:parse_csv("foo,\"foo, bar\",baz").
["foo","\"foo, bar\"","baz"]
答案 2 :(得分:1)
这个怎么样:
1> string:tokens(S, ",").
["20140419","\"Blah blah"," foo foo\"","1","0","0","0","1","2","0","0"]
甚至:
2> re:split(S, ",", [{return,list}]).
["20140419","\"Blah blah"," foo foo\"","1","0","0","0","1","2","0","0"]
(@ kadaj,这是很多解析CSV的代码)
编辑:要正确回答问题,需要重新组合"\"…"
,"…\""
对。
为此,一个简单的递归函数将执行:
finish([[$\"]++Rest=M, Scnd|T], Acc) ->
finish(T, [M++Scnd|Acc]);
finish([H|T], Acc) ->
finish(T, [H |Acc]);
finish([], Acc) ->
lists:reverse(Acc).