我试图比较并发编程语言的性能,例如setTag,Go和Erlang。以下Go代码计算平方和,(重复计算R次方的平方和):
1 ^ 2 + 2 ^ 2 + 3 ^ 2 .... 1024 ^ 2
package main
import "fmt"
func mapper(in chan int, out chan int) {
for v := range in {out <- v*v}
}
func reducer(in1, in2 chan int, out chan int) {
for i1 := range in1 {i2 := <- in2; out <- i1 + i2}
}
func main() {
const N = 1024 // calculate sum of squares up to N; N must be power of 2
const R = 10 // number of repetitions to fill the "pipe"
var r [N*2]chan int
for i := range r {r[i] = make(chan int)}
var m [N]chan int
for i := range m {m[i] = make(chan int)}
for i := 0; i < N; i++ {go mapper(m[i], r[i + N])}
for i := 1; i < N; i++ {go reducer(r[i * 2], r[i *2 + 1], r[i])}
go func () {
for j := 0; j < R; j++ {
for i := 0; i < N; i++ {m[i] <- i + 1}
}
} ()
for j := 0; j < R; j++ {
<- r[1]
}
}
以下代码是Erlang中的MapReduce解决方案。我是Erlang的新手。我想比较Go,Haskell和Erlang之间的表现。我的问题是如何优化这个Erlang代码。我使用erlc -W mr.erl
编译此代码,并使用erl -noshell -s mr start -s init stop -extra 1024 1024
运行代码。是否有任何特殊的编译和执行选项可用于优化?我非常感谢您提供的任何帮助。
-module(mr).
-export([start/0, create/2, doreduce/2, domap/1, repeat/3]).
start()->
[Num_arg|Repeat] = init:get_plain_arguments(),
N = list_to_integer(Num_arg),
[R_arg|_] = Repeat,
R = list_to_integer(R_arg),
create(R, N).
create(R, Num) when is_integer(Num), Num > 0 ->
Reducers = [spawn(?MODULE, doreduce, [Index, self()]) || Index <- lists:seq(1, 2*Num - 1)],
Mappers = [spawn(?MODULE, domap, [In]) || In <- lists:seq(1, Num)],
reducer_connect(Num-1, Reducers, self()),
mapper_connect(Num, Num, Reducers, Mappers),
repeat(R, Num, Mappers).
repeat(0, Num, Mappers)->
send_message(Num, Mappers),
receive
{result, V}->
%io:format("Repeat: ~p ~p ~n", [0, V])
true
end;
repeat(R, Num, Mappers)->
send_message(Num, Mappers),
receive
{result, V}->
%io:format("Got: ~p ~p ~n", [R, V])
true
end,
repeat(R-1, Num, Mappers).
send_message(1, Mappers)->
D = lists:nth (1, Mappers),
D ! {mapper, 1};
send_message(Num, Mappers)->
D = lists:nth (Num, Mappers),
D ! {mapper, Num},
send_message(Num-1, Mappers).
reducer_connect(1, RList, Root)->
Parent = lists:nth(1, RList),
Child1 = lists:nth(2, RList),
Child2 = lists:nth(3, RList),
Child1 ! {connect, Parent},
Child2 ! {connect, Parent},
Parent !{connect, Root};
reducer_connect(Index, RList, Root)->
Parent = lists:nth(Index, RList),
Child1 = lists:nth(Index*2, RList),
Child2 = lists:nth(Index*2+1, RList),
Child1 ! {connect, Parent},
Child2 ! {connect, Parent},
reducer_connect(Index-1, RList, Root).
mapper_connect(1, Num, RList, MList)->
R = lists:nth(Num, RList),
M = lists:nth(1, MList),
M ! {connect, R};
mapper_connect(Index, Num, RList, MList) when is_integer(Index), Index > 0 ->
R = lists:nth(Num + (Index-1), RList),
M = lists:nth(Index, MList),
M ! {connect, R},
mapper_connect(Index-1, Num, RList, MList).
doreduce(Index, CurId)->
receive
{connect, Parent}->
doreduce(Index, Parent, 0, 0, CurId)
end.
doreduce(Index, To, Val1, Val2, Root)->
receive
{map, Val} ->
if Index rem 2 == 0 ->
To ! {reduce1, Val},
doreduce(Index, To, 0, 0, Root);
true->
To ! {reduce2, Val},
doreduce(Index, To, 0, 0, Root)
end;
{reduce1, V1} when Val2 > 0, Val1 == 0 ->
if Index == 1 ->% root node
Root !{result, Val2 + V1},
doreduce(Index, To, 0, 0, Root);
Index rem 2 == 0 ->
To ! {reduce1, V1+Val2},
doreduce(Index, To, 0, 0, Root);
true->
To ! {reduce2, V1+Val2},
doreduce(Index, To, 0, 0, Root)
end;
{reduce2, V2} when Val1 > 0, Val2 == 0 ->
if Index == 1 ->% root node
Root !{result, Val1 + V2},
doreduce(Index, To, 0, 0, Root);
Index rem 2 == 0 ->
To ! {reduce1, V2+Val1},
doreduce(Index, To, 0, 0, Root);
true->
To ! {reduce2, V2+Val1},
doreduce(Index, To, 0, 0, Root)
end;
{reduce1, V1} when Val1 == 0, Val2 == 0 ->
doreduce(Index, To, V1, 0, Root);
{reduce2, V2} when Val1 == 0, Val2 == 0 ->
doreduce(Index, To, 0, V2, Root);
true->
true
end.
domap(Index)->
receive
{connect, ReduceId}->
domap(Index, ReduceId)
end.
domap(Index, To)->
receive
{mapper, V}->
To !{map, V*V},
domap(Index, To);
true->
true
end.
答案 0 :(得分:2)
尽管根本不是Erlang的好任务,但有一个非常简单的解决方案:
-module(mr).
-export([start/1, start/2]).
start([R, N]) ->
Result = start(list_to_integer(R), list_to_integer(N)),
io:format("~B x ~B~n", [length(Result), hd(Result)]).
start(R, N) ->
Self = self(),
Reducer = start(Self, R, 1, N),
[ receive {Reducer, Result} -> Result end || _ <- lists:seq(1, R) ].
start(Parent, R, N, N) ->
spawn_link(fun() -> mapper(Parent, R, N) end);
start(Parent, R, From, To) ->
spawn_link(fun() -> reducer(Parent, R, From, To) end).
mapper(Parent, R, N) ->
[ Parent ! {self(), N*N} || _ <- lists:seq(1, R) ].
reducer(Parent, R, From, To) ->
Self = self(),
Middle = ( From + To ) div 2,
A = start(Self, R, From, Middle),
B = start(Self, R, Middle + 1, To),
[ Parent ! {Self, receive {A, X} -> receive {B, Y} -> X+Y end end}
|| _ <- lists:seq(1, R) ].
您可以使用
运行它$ erlc -W mr.erl
$ time erl -noshell -run mr start 1024 1024 -s init stop
1024 x 358438400
real 0m2.162s
user 0m4.177s
sys 0m0.151s
但大部分时间是VM启动和优雅停止开销
$ time erl -noshell -run mr start 1024 1024 -s erlang halt
1024 x 358438400
real 0m1.172s
user 0m4.110s
sys 0m0.150s
$ erl
1> timer:tc(fun() -> mr:start(1024,1024) end).
{978453,
[358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400,358438400,358438400,
358438400,358438400,358438400,358438400|...]}
请记住,它更像是一个优雅的解决方案,而不是一个有效的解决方案。一个有效的解决方案应该平衡减少树分支与通信开销。