Question

我需要像这样拆分二进制文件：

Bin = <<"Hello my friend">>.
split_by_space(Bin).

并获得：

[<<"Hello">>, <<"my">>, <<"friend">>]

Answer 1

如果您不想使用标准库，可以使用：

-module(split).

%% API:
-export([split/1]).


split(Bin) when is_binary(Bin) ->
    split(Bin, <<>>, []).


%% If there was more than one space
split(<<$ :8, Rest/binary>>, <<>>, Result) ->
    split(Rest, <<>>, Result);
%% If we got space and buffer is not empty, we add buffer to list of words and make buffer empty
split(<<$ :8, Rest/binary>>, Buffer, Result) ->
    split(Rest, <<>>, [Buffer|Result]);
%% If we got a character which is not a space, we add this character to buffer
split(<<Char:8, Rest/binary>>, Buffer, Result) ->
    split(Rest, <<Buffer/binary, Char>>, Result);
%% If main binary and buffer are empty, we reverse the result for return value
split(<<>>, <<>>, Result) ->
    lists:reverse(Result);
%% If main binary is empty and buffer has one or more character, we add buffer to list of words and reverse it for return value
split(<<>>, Buffer, Result) ->
    lists:reverse([Buffer|Result]).

测试上面的代码：

1> split:split(<<"test">>).
[<<"test">>]
2> split:split(<<"  test  ">>).
[<<"test">>]
3> split:split(<<"  te st  ">>).
[<<"te">>,<<"st">>]
4> split:split(<<"">>).         
[]
5> split:split(<<"     ">>).
[]

Answer 2

你可以简单地使用lexemes：

http://erlang.org/doc/man/string.html

lexemes（String :: unicode：chardata（），               SeparatorList :: [grapheme_cluster（）]） - ＆gt;                  [unicode的：chardata（）]

返回String中的lexemes列表，由字素分隔   SeparatorList中的集群。

string:lexemes("foo bar", " ").
["foo","bar"]
string:lexemes(<<"foo bar">>, " ").
[<<"foo">>,<<"bar">>]

另一个功能是拆分：

string:split(<<"foo bar">>, " ", trailing).
[<"foo">>,<<"bar">>]

Answer 3

比Pouriya solution更简单，效率提高2-10倍：

split(Bin) when is_binary(Bin) ->
    skip_spaces(Bin);
split(A) ->
    error(badarg, [A]).

skip_spaces(<<>>) ->                        % empty
    [];
skip_spaces(<<$\s, Rest/bytes>>) ->       % the next space
    skip_spaces(Rest);
skip_spaces(<<Bin/bytes>>) ->               % not a space
    get_word(Bin, 1).

get_word(Bin, I) ->
    case Bin of
        <<Word:I/bytes>> ->                 % the last word
            [Word];
        <<Word:I/bytes, $\s, Rest/bytes>> -> % the next word
            [Word|skip_spaces(Rest)];
        _ ->                                % a next char of the word
            get_word(Bin, I+1)
    end.

它在普通CPU上以15-40MB / s的速度进行解析。

Answer 4

没什么大不了的，你可以使用binary:split/3：

1> Bin = <<"Hello my friend">>.
<<"Hello my friend">>
2> binary:split(Bin, <<" ">>, [global]).
[<<"Hello">>,<<"my">>,<<"friend">>]
3>

如何在空间中拆分erlang中的二进制文件？

4 个答案: