Question

我正在Erlang中实现类似Twitter的应用程序。我有它的分布式和非分布式实现。我正在做基准测试，但似乎我找不到向分布式实现的每个用户进程发送并行请求的方法。我正在使用一个列表：foreach函数将“获取推文”发送到客户端进程列表。我的理解是列表：foreach函数逐步进入列表的每个元素，实现最终使我的分布式的顺序行为实现导致与非分布式实现的执行时间相等。是否可以一次性将“获取推文”请求发送到不同的客户端进程？这对我来说似乎是一个相当具体的案例，很难在StackOverflow内外搜索解决方案。

test_get_tweets_Bench() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("timeline",
    fun () ->
        lists:foreach(fun (_) ->
            UserChoice = pick_random(UserInfos),
            server:get_tweets(element(2, UserChoice), element(1, UserChoice), 1)
        end,
        lists:seq(1, 10000))
    end,
    30).

pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).

userinfos是以下格式的列表：[{userId，client_process}，...]

尝试rpc：pmap而不是列表：foreach之后，我的基准测试变得慢了大约3倍。变化如下：

test_get_tweets_Bench2() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("get_tweets 2",
    fun () ->
        rpc:pmap({?MODULE,do_apply},
                 [fun (_) ->
            UserChoice = pick_random(UserInfos),
            server:get_tweets(element(2, UserChoice), element(1, UserChoice), 1)
        end],
                    lists:seq(1, 10000))
    end,
    30).


pick_random(List) ->
    lists:nth(rand:uniform(length(List)), List).

do_apply(X,F)->
    F(X).

我认为rpc：pmap会使我的基准测试更快，因为它会并行发送get_tweet请求。

下面是我的服务器模块，它是我的基准测试和类似Twitter的应用程序之间的API。 API将我的基准测试请求发送到类似Twitter的应用程序。

    %% This module provides the protocol that is used to interact with an
%% implementation of a microblogging service.
%%
%% The interface is design to be synchrounous: it waits for the reply of the
%% system.
%%
%% This module defines the public API that is supposed to be used for
%% experiments. The semantics of the API here should remain unchanged.
-module(server).

-export([register_user/1,
         subscribe/3,
         get_timeline/3,
         get_tweets/3,
         tweet/3]).

%%
%% Server API
%%

% Register a new user. Returns its id and a pid that should be used for
% subsequent requests by this client.
-spec register_user(pid()) -> {integer(), pid()}.
register_user(ServerPid) ->
    ServerPid ! {self(), register_user},
    receive
        {ResponsePid, registered_user, UserId} -> {UserId, ResponsePid}
    end.

% Subscribe/follow another user.
-spec subscribe(pid(), integer(), integer()) -> ok.
subscribe(ServerPid, UserId, UserIdToSubscribeTo) ->
    ServerPid ! {self(), subscribe, UserId, UserIdToSubscribeTo},
    receive
        {_ResponsePid, subscribed, UserId, UserIdToSubscribeTo} -> ok
    end.

% Request a page of the timeline of a particular user.
% Request results can be 'paginated' to reduce the amount of data to be sent in
% a single response. This is up to the server.
-spec get_timeline(pid(), integer(), integer()) -> [{tweet, integer(), erlang:timestamp(), string()}].
get_timeline(ServerPid, UserId, Page) ->
    ServerPid ! {self(), get_timeline, UserId, Page},
    receive
        {_ResponsePid, timeline, UserId, Page, Timeline} ->
            Timeline
    end.

% Request a page of tweets of a particular user.
% Request results can be 'paginated' to reduce the amount of data to be sent in
% a single response. This is up to the server.
-spec get_tweets(pid(), integer(), integer()) -> [{tweet, integer(), erlang:timestamp(), string()}].
get_tweets(ServerPid, UserId, Page) ->
    ServerPid ! {self(), get_tweets, UserId, Page},
    receive
        {_ResponsePid, tweets, UserId, Page, Tweets} ->
            Tweets
    end.

% Submit a tweet for a user.
% (Authorization/security are not regarded in any way.)
-spec tweet(pid(), integer(), string()) -> erlang:timestamp(). 
tweet(ServerPid, UserId, Tweet) ->
    ServerPid ! {self(), tweet, UserId, Tweet},
    receive
        {_ResponsePid, tweet_accepted, UserId, Timestamp} ->
            Timestamp
    end.

Answer 1

在Erlang中，消息从进程A交换到进程B.没有可用的功能，如广播或选择性广播。在您的应用程序中，我看到了3个步骤：

发送请求以获取用户的推文，
用户进程准备答案并将其发送回请求者
初始流程收集答案

将请求发送到用户进程并收集推文（步骤1和3）不能使用并行性。当然，你可以使用多个进程发送请求并收集答案，每个用户最多1个，但我想这不是你问题的主题。

可行的是确保3个步骤不是按顺序为每个用户进程完成，而是并行执行。我想函数server:get_tweets负责发送请求并收集答案。如果我是正确的（我不知道因为你没有提供代码，而你忽略了返回的值），你可以通过在2中分割这个函数来使用并行性，第一个发送请求，第二个收集答案。（这是一个代码示例，我没有尝试过甚至编译过，所以要小心考虑：o）

test_get_tweets_Bench() ->
{ServerPid, UserInfos} = initializeForBench_server(),
run_benchmark("timeline",
    fun () ->
        % send the requests
        List = lists:map(fun (_) ->
            {UserId,Pid} = pick_random(UserInfos),
            Ref = server:request_tweets(Pid,UserId),
            {Ref,UserId}
            end,
            lists:seq(1, 10000)),
        % collects the answers
        collect(L,[])
    end,
    30).

collect([],Result) -> {ok,Result};
collect(List,ResultSoFar) ->
    receive
        {Ref,UserId,Tweets} ->
            {ok,NewList} = remove_pending_request(Ref,UserId,List),
            collect(Newlist,[{UserId,Tweets}|ResultSoFar])
    after ?TIMEOUT
        {error,timeout,List,ResultSoFar}
    end.

remove_pending_request(Ref,UserId,List) ->
    {value,{Ref,UserId},NewList} = lists:keytake(Ref,1,List),
    {ok,NewList}. 

pick_random(List) ->
lists:nth(rand:uniform(length(List)), List).

这是我实施并行基准测试的另一种尝试，它没有实现任何加速。

get_tweets(Sender, UserId, Node) ->
server:get_tweets(Node, UserId, 0),
Sender ! done_get_tweets.

test_get_tweets3() ->
    {_ServerId, UserInfos} = initializeForBench_server(),
    run_benchmark("parallel get_tweet", 
        fun () ->
            lists:foreach(
                fun (_) ->
                    {UserId,Pid} = pick_random(UserInfos),
                    spawn(?MODULE, get_tweets, [self(), UserId, Pid])
                end,
                lists:seq(1, ?NUMBER_OF_REQUESTS)),
            lists:foreach(fun (_) -> receive done_get_tweets -> ok end end, lists:seq(1, ?NUMBER_OF_REQUESTS))
        end,
        ?RUNS).

发送并行请求Erlang

1 个答案: