保存从延迟锁定生成的Dask数组会导致错误:
TypeError: can't pickle _thread._local objects
仅计算Dask数组不会导致错误。
测试:
import numpy as np
import dask
from dask.distributed import Client, Lock
import time
import dask.array as da
@dask.delayed
def locked_load(fn):
lock = Lock('numpy-read')
lock.acquire()
out = np.load(fn)
lock.release()
return out
def make(arr_size, n_parts, use_lock=True):
x = np.arange(arr_size, dtype=np.int)
for i in range(n_parts):
np.save('%d.npy' % i, x)
d = [locked_load('%d.npy' % i) for i in range(n_parts)]
z = [da.from_delayed(a, (arr_size,), np.int) for a in d]
return da.vstack(z)
def main():
client = Client()
a = make(10, 2, True)
print(a.compute()) # this works.
a.to_hdf5('test.hdf5', '/a') # this doesn't.
client.close()
if __name__ == '__main__':
main()
结果:
[[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]]
<...>/site-packages/h5py/__init__.py:36: FutureWarning:
Conversion of the second argument of issubdtype from `float` to
`np.floating` is deprecated. In future, it will be treated as
`np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Traceback (most recent call last):
File "<...>/site-packages/distributed/protocol/pickle.py", line 38, in dumps
result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
TypeError: can't pickle _thread._local objects
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "lock5.py", line 34, in <module>
main()
File "lock5.py", line 29, in main
a.to_hdf5('test.hdf5', '/a')
File "<...>/site-packages/dask/array/core.py", line 1243, in to_hdf5
return to_hdf5(filename, datapath, self, **kwargs)
File "<...>/site-packages/dask/array/core.py", line 3783, in to_hdf5
store(list(data.values()), dsets)
File "<...>/site-packages/dask/array/core.py", line 961, in store
result.compute(**kwargs)
File "<...>/site-packages/dask/base.py", line 156, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "<...>/site-packages/dask/base.py", line 402, in compute
results = schedule(dsk, keys, **kwargs)
File "<...>/site-packages/distributed/client.py", line 1993, in get
resources=resources)
File "<...>/site-packages/distributed/client.py", line 1954, in _graph_to_futures
'tasks': valmap(dumps_task, dsk3),
File "cytoolz/dicttoolz.pyx", line 165, in cytoolz.dicttoolz.valmap (cytoolz/dicttoolz.c:3277)
File "cytoolz/dicttoolz.pyx", line 190, in cytoolz.dicttoolz.valmap (cytoolz/dicttoolz.c:3124)
File "<...>/site-packages/distributed/worker.py", line 703, in dumps_task
'args': pickle.dumps(task[1:])}
File "<...>/site-packages/distributed/protocol/pickle.py", line 51, in dumps
return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
File "<...>/site-packages/cloudpickle/cloudpickle.py", line 881, in dumps
cp.dump(obj)
File "<...>/site-packages/cloudpickle/cloudpickle.py", line 268, in dump
return Pickler.dump(self, obj)
File "<...>/python3.6/pickle.py", line 409, in dump
self.save(obj)
File "<...>/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "<...>/python3.6/pickle.py", line 751, in save_tuple
save(element)
File "<...>/python3.6/pickle.py", line 521, in save
self.save_reduce(obj=obj, *rv)
File "<...>/python3.6/pickle.py", line 634, in save_reduce
save(state)
File "<...>/python3.6/pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "<...>/python3.6/pickle.py", line 821, in save_dict
self._batch_setitems(obj.items())
File "<...>/python3.6/pickle.py", line 847, in _batch_setitems
save(v)
File "<...>/python3.6/pickle.py", line 496, in save
rv = reduce(self.proto)
TypeError: can't pickle _thread._local objects