我需要像这样拆分二进制文件:
Bin = <<"Hello my friend">>.
split_by_space(Bin).
并获得:
[<<"Hello">>, <<"my">>, <<"friend">>]
答案 0 :(得分:1)
如果您不想使用标准库,可以使用:
-module(split).
%% API:
-export([split/1]).
split(Bin) when is_binary(Bin) ->
split(Bin, <<>>, []).
%% If there was more than one space
split(<<$ :8, Rest/binary>>, <<>>, Result) ->
split(Rest, <<>>, Result);
%% If we got space and buffer is not empty, we add buffer to list of words and make buffer empty
split(<<$ :8, Rest/binary>>, Buffer, Result) ->
split(Rest, <<>>, [Buffer|Result]);
%% If we got a character which is not a space, we add this character to buffer
split(<<Char:8, Rest/binary>>, Buffer, Result) ->
split(Rest, <<Buffer/binary, Char>>, Result);
%% If main binary and buffer are empty, we reverse the result for return value
split(<<>>, <<>>, Result) ->
lists:reverse(Result);
%% If main binary is empty and buffer has one or more character, we add buffer to list of words and reverse it for return value
split(<<>>, Buffer, Result) ->
lists:reverse([Buffer|Result]).
测试上面的代码:
1> split:split(<<"test">>).
[<<"test">>]
2> split:split(<<" test ">>).
[<<"test">>]
3> split:split(<<" te st ">>).
[<<"te">>,<<"st">>]
4> split:split(<<"">>).
[]
5> split:split(<<" ">>).
[]
答案 1 :(得分:0)
你可以简单地使用lexemes:
http://erlang.org/doc/man/string.html
lexemes(String :: unicode:chardata(), SeparatorList :: [grapheme_cluster()]) - &gt; [unicode的:chardata()]
返回String中的lexemes列表,由字素分隔 SeparatorList中的集群。
string:lexemes("foo bar", " ").
["foo","bar"]
string:lexemes(<<"foo bar">>, " ").
[<<"foo">>,<<"bar">>]
另一个功能是拆分:
string:split(<<"foo bar">>, " ", trailing).
[<"foo">>,<<"bar">>]
答案 2 :(得分:0)
比Pouriya solution更简单,效率提高2-10倍:
split(Bin) when is_binary(Bin) ->
skip_spaces(Bin);
split(A) ->
error(badarg, [A]).
skip_spaces(<<>>) -> % empty
[];
skip_spaces(<<$\s, Rest/bytes>>) -> % the next space
skip_spaces(Rest);
skip_spaces(<<Bin/bytes>>) -> % not a space
get_word(Bin, 1).
get_word(Bin, I) ->
case Bin of
<<Word:I/bytes>> -> % the last word
[Word];
<<Word:I/bytes, $\s, Rest/bytes>> -> % the next word
[Word|skip_spaces(Rest)];
_ -> % a next char of the word
get_word(Bin, I+1)
end.
它在普通CPU上以15-40MB / s的速度进行解析。
答案 3 :(得分:0)
没什么大不了的,你可以使用binary:split/3
:
1> Bin = <<"Hello my friend">>.
<<"Hello my friend">>
2> binary:split(Bin, <<" ">>, [global]).
[<<"Hello">>,<<"my">>,<<"friend">>]
3>