Dask.map_overlap(x, func)
,func仅将DaskArray
作为单独输入参数,我可以传递额外的参数吗?
我创建了一个sudo函数,仅将daskarray
作为输入参数,并在func(array)内部使用daskarray和其他参数调用了实函数。但是我遇到了客户端序列化错误。
我正在使用DASK使用HPC节点进行地震数据计算。我有一个由cpython创建的funcA
,它使用dask数组和其他3个参数。我使用da.map_overlap
来调用funcA
,如果我长时间传递了dask数组参数,它可以正常工作。但是,如果我传递额外的参数,则会出现客户端序列化错误。
首先,我在abc.pyx
中定义一个函数
#Cython code to compute abc
cimport cython
import numpy as np
cimport numpy as np
DTYPE = np.float32
ctypedef np.float32_t DTYPE_t
@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False) # turn off negative index wrapping for entire function
cpdef np.ndarray[DTYPE_t, ndim=3] abc(np.ndarray[DTYPE_t, ndim=3] f, int x, int y, int z):
cdef int nx=f.shape[0], ny = f.shape[1], nz = f.shape[2]
cdef int wx=1, wy=1, wz=4
wx = x
wy = y
wz = z
output = np.zeros([nx, ny, nz], dtype=DTYPE)
return output
我使用python setup.py build_ext --inplace
获取abc.so
文件并将abc.so
复制到site_packages
目录。
我有compute_abc.py
:
import dask.array as da
from abc import abc
from dask.distributed import wait
import os
class compute_abc(object):
def __init__(self):
# super.__init__()
self.validated = False
self.win_x = 1
self.win_y = 1
self.win_z = 4
pass
def set_params(self, client, in_file, out_file, parameters=None):
self.client = client
self.in_file = in_file
self.output_path = out_file
self.validated = True
if parameters is not None:
if "win_x" in parameters:
self.win_x = int(parameters["win_x"])
if "win_y" in parameters:
self.win_y = int(parameters["win_y"])
if "win_z" in parameters:
self.win_z = int(parameters["win_z"])
def run(self):
if not self.validated:
print("Can't run, parameters were not validated")
return
if not isinstance(self.in_file, da.core.Array):
chunkdata = da.from_zarr(self.in_file)
chunkdata = self.client.persist(chunkdata)
else:
chunkdata = self.in_file
def func(data):
return abc(data, self.win_x, self.win_y, self.win_z)
depth = {0: self.win_x, 1: self.win_y, 2: self.win_z}
boundary = {0: 0, 1: 0, 2: 0}
daskout = da.map_overlap(chunkdata, abc, depth=depth, boundary=boundary, trim=True, dtype=chunkdata.dtype)
daskout = self.client.persist(daskout)
tmppath = os.path.dirname(self.output_path)
if os.path.isdir(tmppath):
daskout.to_zarr(self.output_path)
else:
print('Invalid path')
return chunkdata, daskout
我有一个测试班要呼叫compute_abc
:
import sys
from . import compute_abc
from dask.distributed import Client
import ast
def main(argv):
param = ast.literal_eval('{"win_x": 1, "win_y": 1, "win_z": 4}')
comp_abc = compute_abc()
client = Client()
comp_abc.set_params(client, '../data_zarr/seismic/seismic.zarr', '../dataOut/seismic/seismic_semblance.zarr', param)
data_in, res_abc = compute_abc.run()
if __name__ == "__main__":
main(sys.argv[1:])
我希望daskout
包含daskarray作为计算结果。但是我收到了如下的客户端序列化错误:
daskout.to_zarr(self.output_path)
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/dask/array/core.py", line 2228, in to_zarr
return to_zarr(self, *args, **kwargs)
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/dask/array/core.py", line 2808, in to_zarr
**kwargs
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/zarr/creation.py", line 120, in create
chunk_store=chunk_store, filters=filters, object_codec=object_codec)
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/zarr/storage.py", line 323, in init_array
object_codec=object_codec)
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/zarr/storage.py", line 337, in _init_array_metadata
distributed.protocol.core - CRITICAL - Failed to Serialize
Traceback (most recent call last):
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/distributed/protocol/core.py", line 46, in dumps
for key, value in data.items()
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/distributed/protocol/core.py", line 47, in <dictcomp>
if type(value) is Serialize
File "/glb/data/cdis_projects/users/ussnis/miniconda3/envs/pvenv/lib/python3.7/site-packages/distributed/protocol/serialize.py", line 164, in serialize
raise TypeError(msg, str(x)[:10000])
TypeError: ('Could not serialize object of type tuple.', '(subgraph_callable, (subgraph_callable, (<built-in function getitem>, (<function concatenate3 at 0x2b7d1278f400>, (<function concrete at 0x2b7d119f62f0>, [[["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 1.1, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 1.1, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 1.1, 1.9)"], ["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 2, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 2, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 2, 1.9)"], ["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 2.9, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 2.9, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 2.1, 2.9, 1.9)"]], [["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 1.1, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 1.1, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 1.1, 1.9)"], ["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 2, 0.09999999999999998)", "(\'from-zarr-1b4273213cbfa6c35d3560d23e8e8c89\', 2, 1, 0)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 2, 1.9)"], ["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 2.9, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 2.9, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3, 2.9, 1.9)"]], [["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 1.1, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 1.1, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 1.1, 1.9)"], ["(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 2, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 2, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 2, 1.9)"], ["(\'concatenate-getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 2.9, 0.09999999999999998)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 2.9, 1)", "(\'getitem-33b7b6bef20ae67990d1e0e68e6462ff\', 3.9, 2.9, 1.9)"]]])), (slice(None, None, None), slice(None, None, None), slice(None, None, None))), "(\'from-zarr-1b4273213cbfa6c35d3560d23e8e8c89\', 2, 1, 0)"), "(\'from-zarr-1b4273213cbfa6c35d3560d23e8e8c89\', 2, 1, 0)")')
err_contains_array(path)
如果我使abc函数仅将数据作为输入参数,并在abc函数中硬编码了x,y,z,则可以正常工作。
答案 0 :(得分:0)
找出调用da.map_overlap()时为函数添加参数的方法。 da.map_overlap(数组,函数,深度,边界,修剪,param1 =值,param2 =值,...)