我有以下测试模块(MyMod.jl)来存储Julia中的一些函数。一些核心功能是串行编写的。其他函数并行调用核心函数。
module MyMod
export Dummy,distribute_data,recombine_data,regular_test,parallel_test
function Dummy(icol,model,data,A,B)
nz,nx,nh = size(model) # = size(A) = size(B)
for ih = 1:nh
for ix = 1:nx
for iz = 1:nz
data[iz,icol] += A[iz,ix,ih]*B[iz,ix,ih]*model[iz,ix,ih]
end
end
end
end
function distribute_data(X, obj_name_on_worker::Symbol;mod=Main)
dim = length(size(X))
size_per_worker = floor(Int,size(X,1) / nworkers())
StartIdx = 1
EndIdx = size_per_worker
for (idx, pid) in enumerate(workers())
if idx == nworkers()
EndIdx = size(X,1)
end
println(StartIdx:EndIdx)
if dim == 3
@spawnat(pid, eval(mod, Expr(:(=), obj_name_on_worker, X[StartIdx:EndIdx,:,:])))
elseif dim == 2
@spawnat(pid, eval(mod, Expr(:(=), obj_name_on_worker, X[StartIdx:EndIdx,:])))
end
StartIdx = EndIdx + 1
EndIdx = EndIdx + size_per_worker - 1
end
end
function recombine_data(Data::Symbol;mod=Main)
Results = cell(nworkers())
for (idx, pid) in enumerate(workers())
Results[idx] = fetch(@spawnat(pid, getfield(mod, Data)))
end
return vcat(Results...)
end
function regular_test(model,data,A,B)
ncol=size(data,2)
map((arg)->Dummy(arg,model,data,A,B),[icol for icol = 1:ncol])
end
function parallel_test(model,data,A,B)
distribute_data(model, :model)
distribute_data(A, :A)
distribute_data(B, :B)
distribute_data(data, :data)
@everywhere ncol=size(data,2)
@everywhere begin
if myid() != 1
map((arg)->Dummy(arg,model,data,A,B),[icol for icol = 1:ncol])
end
end
P_Data = recombine_data(:data)
return P_Data
end
end
此模块按预期工作。当我打开Julia会话并运行以下命令时,我发现regular_test
提供的结果与parallel_test
相同,没有任何错误。
addprocs(3)
@everywhere using MyMod
nx = 250;
nz = 350;
nh = 150;
ncol = 125;
model = rand(nz,nx,nh);
data = SharedArray(Float64,nz,ncol);
A = rand(nz,nx,nh);
B = rand(nz,nx,nh);
@time P_Data = parallel_test(model,data,A,B);
@time regular_test(model,data,A,B);
P_Data == data
对于更大/更复杂的功能,以这种方式存储模块变得非常混乱。以前我通过将每个函数存储为单独的文件然后使用include(..)
将它们带入模块来清理事物。以下模块是我尝试这样做的:
module MyMod_2
export Dummy,distribute_data,recombine_data,regular_test,parallel_test
@everywhere include("Dummy.jl")
@everywhere include("distribute_data.jl")
@everywhere include("recombine_data.jl")
@everywhere include("regular_test.jl")
@everywhere include("parallel_test.jl")
end
其中每个函数作为单独的文件存储在与MyMod2
相同的目录中。但是,当我打开Julia会话并尝试运行与以前相同的命令集时,执行@everywhere using MyMod_2
时出现以下冗长错误
WARNING: Module MyMod_2 not defined on process 4
fatal error on WARNING: Module MyMod_2 not defined on process 3
4: fatal error on WARNING: Module MyMod_2 not defined on process 2
3: fatal error on 2: ERROR: UndefVarError: MyMod_2 not defined
in deserialize at serialize.jl:504
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:560
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:538
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:696
in deserialize_datatype at serialize.jl:651
in handle_deserialize at serialize.jl:465
in message_handler_loop at multi.jl:862
in process_tcp_streams at multi.jl:851
in anonymous at task.jl:63
ERROR: UndefVarError: MyMod_2 not defined
in deserialize at serialize.jl:504
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:560
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:538
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:696
in deserialize_datatype at serialize.jl:651
in handle_deserialize at serialize.jl:465
in message_handler_loop at multi.jl:862
in process_tcp_streams at multi.jl:851
in anonymous at task.jl:63
ERROR: UndefVarError: MyMod_2 not defined
in deserialize at serialize.jl:504
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:560
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:538
in handle_deserialize at serialize.jl:465
in deserialize at serialize.jl:696
in deserialize_datatype at serialize.jl:651
in handle_deserialize at serialize.jl:465
in message_handler_loop at multi.jl:862
in process_tcp_streams at multi.jl:851
in anonymous at task.jl:63
Worker 2 terminated.
ERROR: ProcessExitedException()
in yieldto at ./task.jl:71
in wait at ./task.jl:371
in wait at ./task.jl:286
in wait at ./channels.jl:63
in take! at ./channels.jl:53
in take! at ./multi.jl:803
in remotecall_fetch at multi.jl:729
in remotecall_fetch at multi.jl:734
in anonymous at multi.jl:1380
...and 3 other exceptions.
in sync_end at ./task.jl:413
in anonymous at multi.jl:1389
Worker 3 terminated.ERROR (unhandled task failure): EOFError: read end of file
julia> Worker 4 terminated.ERROR (unhandled task failure): EOFError: read end of file
julia>
fatal error on ERROR (unhandled task failure): EOFError: read end of file
1: ERROR: attempt to send to unknown socket
fatal error on 1: ERROR: attempt to send to unknown socket
我是否可以在MyMod_2
中更改以修复此错误并将其正确加载到Julia会话中?
答案 0 :(得分:2)
小调整 - 在您的模块中,省略@everywhere
宏。只需在加载模块时使用单@everywhere
即可。否则,您会遇到一种奇怪的递归类型情况,即工作人员都在执行模块中的代码(来自@everywhere using MyMod_2
,然后每个工作人员也调用{{1} })