筛选包含字符串模式的列表

时间:2018-09-17 09:13:08

标签: list filter erlang

我需要过滤包含字符串 Status = ACTIVE 的列表中的元素,并将其另存为新列表。

后来我也需要使用 Status = STOPPED System = Windows 过滤相同的列表。

列表:

DATETIME

想要的结果:

Programs=
    ["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
    "Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
    "Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
    "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].

和:

["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
    "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].

我找到了可能的解决方案,以re:run / 2和list:filter / 2对其进行过滤。有没有更简便快捷的方法?

["Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED"].

Br,

达米安

4 个答案:

答案 0 :(得分:4)

第一个可以通过

实现
lists:filter(
    fun (A) ->
        lists:member("Status=ACTIVE", string:tokens(A,","))
    end, Programs).

第二个比较复杂,因为它需要验证两个条件:

lists:filter(
    fun(A) ->
        Tokens = string:tokens(A, ","),
        lists:member("Status=STOPPED", Tokens)
        andalso lists:member("System=Windows", Tokens)
    end, Programs).

我不确定它是否更快,但至少可读性更高。

答案 1 :(得分:3)

尽管chorobasolution是完全正确的,我还是会写一些有关Erlang最佳实践的文章。一种很好的做法是尽快将数据从Erlang世界外部转换为适当的Erlang结构。当项目成长并变得更加复杂,需要适当的操作,调试,故障排除等时,它将在长期的项目发展和维护中得到回报。因此,通常您将尽可能多的文本数据解析为记录,原子,整数,数字等。它允许两件事。首先,您应尽快验证传入的数据,以防止错误在边界上的系统内部传播。然后允许使用快速方法。其次,您可以编写许多辅助函数,这使进一步开发变得更加容易。像

-module(programs).

-record(program, {
          process,
          system,
          pid,
          program,
          status
         }).

%% API

-export([parse_programs/1, active/1, stopped/1, linux/1, windows/1]).

parse_programs(L) ->
    [parse_program(X) || X <- L].

active(P) -> P#program.status =:= active.

stopped(P) -> P#program.status =:= stopped.

linux(P) -> P#program.system =:= 'Linux'.

windows(P) -> P#program.system =:= 'Windows'.

%% Internal functions

parse_program(Str) ->
    parse_program(string:tokens(Str, ","), #program{}).

parse_program([], P) -> P;
parse_program(["Process=" ++ Str | T], P) ->
    parse_program(T, P#program{process = list_to_integer(Str)});
parse_program(["System=" ++ Str | T], P) ->
    parse_program(T, P#program{system = parse_system(Str)});
parse_program(["PID=" ++ Str | T], P) ->
    parse_program(T, P#program{pid = list_to_integer(Str)});
parse_program(["Program=" ++ Str | T], P) ->
    parse_program(T, P#program{program = Str});
parse_program(["Status=" ++ Str | T], P) ->
    parse_program(T, P#program{status = parse_status(Str)});
parse_program([H | _], _) ->
    error(badarg, [H]).

parse_system("Linux") -> 'Linux';
parse_system("Windows") -> 'Windows';
parse_system(Str) -> error(badarg, [Str]).

parse_status("ACTIVE") -> active;
parse_status("STOPPED") -> stopped;
parse_status(Str) -> error(badarg, [Str]).

接下来的任务变得简单

1> c(programs).
{ok,programs}
2> rr("programs.erl").
[program]
3> Programs=
3>     ["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
3>     "Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
3>     "Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
3>     "Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
3>     "Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
3>     "Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
3>     "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
3>     "Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
3>     "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"].
["Process=1,System=Linux,PID=240,Program=DRMX,Status=ACTIVE",
 "Process=1,System=Linux,PID=240,Program=DRMX,Status=STOPPED",
 "Process=1,System=Windows,PID=240,Program=DRMX,Status=ACTIVE",
 "Process=1,System=Linux,PID=242,Program=DRMX,Status=ACTIVE",
 "Process=1,System=Windows,PID=242,Program=DRMX,Status=STOPPED",
 "Process=1,System=Windows,PID=242,Program=DRMX,Status=ACTIVE",
 "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE",
 "Process=1,System=Linux,PID=246,Program=DRMX,Status=STOPPED",
 "Process=1,System=Linux,PID=246,Program=DRMX,Status=ACTIVE"]
4> Ps = programs:parse_programs(Programs).
[#program{process = 1,system = 'Linux',pid = 240,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Linux',pid = 240,
          program = "DRMX",status = stopped},
 #program{process = 1,system = 'Windows',pid = 240,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Linux',pid = 242,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Windows',pid = 242,
          program = "DRMX",status = stopped},
 #program{process = 1,system = 'Windows',pid = 242, 
          program = "DRMX",status = active},
 #program{process = 1,system = 'Linux',pid = 246,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Linux',pid = 246,
          program = "DRMX",status = stopped},
 #program{process = 1,system = 'Linux',pid = 246,
          program = "DRMX",status = active}]
5> lists:filter(fun programs:active/1, Ps).
[#program{process = 1,system = 'Linux',pid = 240,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Windows',pid = 240,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Linux',pid = 242,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Windows',pid = 242,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Linux',pid = 246,
          program = "DRMX",status = active},
 #program{process = 1,system = 'Linux',pid = 246,
          program = "DRMX",status = active}]
6> lists:filter(fun(P) -> programs:stopped(P) andalso programs:windows(P) end, Ps).
[#program{process = 1,system = 'Windows',pid = 242,
          program = "DRMX",status = stopped}]

作为副作用,您的程序将消耗更少的内存,因为数字和原子的消耗都远少于字符串。而且,任何进一步的处理都将更快,因为所有原子的比较就像数字比较一样,更少的内存意味着CPU缓存中的更多数据,CPU缓存命中率比主内存访问快两个数量级。

在这种情况下

7> erts_debug:size(Programs).                                         
1062
8> erts_debug:size(Ps).      
153

这意味着您可以在CPU缓存中保留几乎七倍的数据。当您在Erlang发行版中的进程或节点之间发送消息时,速度将提高七倍,...(如果将list_to_binary / 1用作程序名称,则相差十倍。)

parse_program(["Program=" ++ Str | T], P) ->
    parse_program(T, P#program{program = list_to_binary(Str)});

然后

9> c(programs).
{ok,programs}
10> f(Ps).
ok
11> Ps = programs:parse_programs(Programs).
[{program,1,'Linux',240,<<"DRMX">>,active},
 {program,1,'Linux',240,<<"DRMX">>,stopped},
 {program,1,'Windows',240,<<"DRMX">>,active},
 {program,1,'Linux',242,<<"DRMX">>,active},
 {program,1,'Windows',242,<<"DRMX">>,stopped},
 {program,1,'Windows',242,<<"DRMX">>,active},
 {program,1,'Linux',246,<<"DRMX">>,active},
 {program,1,'Linux',246,<<"DRMX">>,stopped},
 {program,1,'Linux',246,<<"DRMX">>,active}]
12> erts_debug:size(Ps).
108

所以从8.3KiB下降到864B。

答案 2 :(得分:1)

我将强烈考虑将此类列表解析为记录列表,以规范化数据结构。记录的过滤变得非常优雅:

-record(program,{process,
                 system,
                 pid,
                 program,
                 status}).

% Parse list of strings to list of programs
ParsedPrograms=parseProgram(Programs),
[Valid || Valid = #program{status="STOPPED", system="Windows"} <- ParsedPrograms].

答案 3 :(得分:0)

由于str已过时,因此您也可以使用string:str / 2或string:find / 2。

ListA = [X || X <- Programs, string:str(X, "ACTIVE") > 0],
ListB = [X || X <- Programs, string:str(X, "STOPPED") > 0 and string:str(X, "Windows") > 0].

ListA = [X || X <- Programs, string:find(X, "ACTIVE") /= undefined],
ListB = [X || X <- Programs, string:find(X, "STOPPED") /= undefined and string:find(X, "Windows") /= undefined]