为什么我的主管会终止?

时间:2012-08-23 16:34:35

标签: erlang otp supervisor gen-server

我对OTP很新,我正在尝试创建一个简单的例子来理解主管的行为:

这是简单的增量服务器

-module( inc_serv ).
-behaviour( gen_server ).
-export( [ start/0, inc/1, stop/0 ] ).
-export( [ init/1, handle_call/3, terminate/2 ] ).

start() ->
        gen_server:start_link( { local, ?MODULE }, ?MODULE, no_args, [] ).

stop() ->
        gen_server:call( ?MODULE, stop ).

inc( Num ) ->
        gen_server:call( ?MODULE, { num, Num } ).

init( no_args ) ->
        io:format( "~p~n", [ "Increment server started :)" ] ),
        { ok, no_state }.

handle_call( { num, Num }, _From, no_state ) ->
        { reply, Num + 1, no_state };
handle_call( stop, _From, no_state ) ->
        { stop, normal, ok, no_state }.

terminate( Reason, no_state ) ->
        io:format( "~p~n", [ "Increment server stopped" ] ).

我想通过这个模块进行监督:

-module( supervisor_inc ).
-behaviour( supervisor ).

-export( [ start/0 ] ).
-export( [ init/1 ] ).

start() ->
        supervisor:start_link( { local, ?MODULE }, ?MODULE, no_args ).

init( no_args ) ->
        process_flag( trap_exit, true ),
        Supervisor_Spec = { one_for_one, 1, 1 },
        IncServ_Spec = {
                inc_serv,
                { inc_serv, start, [] },
                permanent, 2000, worker, [ inc_serv ] },
        { ok, { Supervisor_Spec, [ IncServ_Spec ] } }.

之后我在erlang shell中执行了下一步:

1> 
1> c(inc_serv).
{ok,inc_serv}
2> 
2> c(supervisor_inc).
{ok,supervisor_inc}
3> 
3> supervisor_inc:start().
"Increment server started :)"
{ok,<0.43.0>}
4> 
4> inc_serv:inc( 7 ).
8
5> inc_serv:inc( 8 ).
9

在此之后我尝试了下一步(正如我所料,我有错误):

6> inc_serv:inc( bad_arg ).
"Increment server stopped"
"Increment server started :)"

=ERROR REPORT==== 23-Aug-2012::19:32:06 ===
** Generic server inc_serv terminating 
** Last message in was {num,bad_arg}
** When Server state == no_state
** Reason for termination == 
** {badarith,[{inc_serv,handle_call,3,[{file,"inc_serv.erl"},{line,22}]},
              {gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,588}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,227}]}]}

=ERROR REPORT==== 23-Aug-2012::19:32:06 ===
** Generic server supervisor_inc terminating 
** Last message in was {'EXIT',<0.31.0>,
                           {{{badarith,
                                 [{inc_serv,handle_call,3,
                                      [{file,"inc_serv.erl"},{line,22}]},
                                  {gen_server,handle_msg,5,
                                      [{file,"gen_server.erl"},{line,588}]},
                                  {proc_lib,init_p_do_apply,3,
                                      [{file,"proc_lib.erl"},{line,227}]}]},
                             {gen_server,call,[inc_serv,{num,bad_arg}]}},
                            [{gen_server,call,2,
                                 [{file,"gen_server.erl"},{line,180}]},
                             {erl_eval,do_apply,6,
                                 [{file,"erl_eval.erl"},{line,576}]},
                             {shell,exprs,7,[{file,"shell.erl"},{line,668}]},
                             {shell,eval_exprs,7,
                                 [{file,"shell.erl"},{line,623}]},
                             {shell,eval_loop,3,
                                 [{file,"shell.erl"},{line,608}]}]}}
** When Server state == {state,
                            {local,supervisor_inc},
                            one_for_one,
                            [{child,<0.48.0>,inc_serv,
                                 {inc_serv,start,[]},
                                 permanent,2000,worker,
                                 [inc_serv]}],
                            undefined,1,1,
                            [{1345,739526,107495}],
                            supervisor_inc,no_args}
** Reason for termination == 
** {{{badarith,[{inc_serv,handle_call,3,[{file,"inc_serv.erl"},{line,22}]},
                {gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,588}]},
                {proc_lib,init_p_do_apply,3,
                          [{file,"proc_lib.erl"},{line,227}]}]},
     {gen_server,call,[inc_serv,{num,bad_arg}]}},
    [{gen_server,call,2,[{file,"gen_server.erl"},{line,180}]},
     {erl_eval,do_apply,6,[{file,"erl_eval.erl"},{line,576}]},
     {shell,exprs,7,[{file,"shell.erl"},{line,668}]},
     {shell,eval_exprs,7,[{file,"shell.erl"},{line,623}]},
     {shell,eval_loop,3,[{file,"shell.erl"},{line,608}]}]}
** exception exit: {{badarith,[{inc_serv,handle_call,3,
                                         [{file,"inc_serv.erl"},{line,22}]},
                               {gen_server,handle_msg,5,
                                           [{file,"gen_server.erl"},{line,588}]},
                               {proc_lib,init_p_do_apply,3,
                                         [{file,"proc_lib.erl"},{line,227}]}]},
                    {gen_server,call,[inc_serv,{num,bad_arg}]}}
     in function  gen_server:call/2 (gen_server.erl, line 180)

在此之后,我预计 - 我的主管重新启动inc_serv。但事实并非如此:

7> inc_serv:inc( 8 ).      
** exception exit: {noproc,{gen_server,call,[inc_serv,{num,8}]}}
     in function  gen_server:call/2 (gen_server.erl, line 180)
你可以帮我理解发生了什么吗?我应该如何重写我的主管,使其能够重新启动inc_serv

由于

1 个答案:

答案 0 :(得分:22)

这实际上是一种竞争条件。

您可能知道,Erlang shell本身是一个普通的Erlang进程。当您从shell启动主管时,主管链接到shell(因为您使用supervisor:start_link/3)。

当您调用gen_server进程时,该进程崩溃(并由主管正确重新启动,正如您随后的"Increment server started :)"输出所示)。

然而,与此同时,您对gen_server:call/2的调用将导致同样的崩溃(调用期间gen_server崩溃将通过gen_server:call/2发出相同的崩溃功能)。这会导致shell进程崩溃,该进程与您的主管相关联,而主管又因同样的原因崩溃(badarith)。

基本上,在忠诚地重新启动gen_server之后,您的主管会被您的shell进程背叛。像这样:

       +---------(6)exit----------+    +---------(5)restart---------+
       |                          |    |                            |
       |                          v    |                            v
     Shell ---(1)start_link---> supervisor ---(2)start_link---> gen_server
     |  ^                         ^    |                         ^  |   ^
     |  |                         |    |                         |  |   |
     |  |                         |    +---------(7)exit---------+  |   |
     |  |                         |                                 |   |
     |  +-------------------------+--------------(4)exit------------+   |
     |                                                                  |
     +---------------------------(3)call--------------------------------+

您可以通过调用shell中的catch inc_serv:inc(bad_arg).来避免这种情况:

90> inc_serv:inc(7).        
8
91> catch inc_serv:inc(bad_arg).
"Increment server stopped"

=ERROR REPORT==== 23-Aug-2012::22:10:02 ===
** Generic server inc_serv terminating 
** Last message in was {num,bad_arg}
** When Server state == no_state
** Reason for termination == 
** {badarith,[{inc_serv,handle_call,3,[{file,"inc_serv.erl"},{line,20}]},
              {gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,588}]},
              {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,227}]}]}
"Increment server started :)"
{'EXIT',{{badarith,[{inc_serv,handle_call,3,
                              [{file,"inc_serv.erl"},{line,20}]},
                    {gen_server,handle_msg,5,
                              [{file,"gen_server.erl"},{line,588}]},
                    {proc_lib,init_p_do_apply,3,
                              [{file,"proc_lib.erl"},{line,227}]}]},
                    {gen_server,call,[inc_serv,{num,bad_arg}]}}}
92> inc_serv:inc(7).            
8