无法保存延迟创建的Dask数组并锁定到hdf5文件

时间:2018-08-06 20:25:30

标签: python dask

保存从延迟锁定生成的Dask数组会导致错误:

TypeError: can't pickle _thread._local objects

仅计算Dask数组不会导致错误。

测试:

import numpy as np
import dask
from dask.distributed import Client, Lock
import time
import dask.array as da

@dask.delayed
def locked_load(fn):
    lock = Lock('numpy-read')
    lock.acquire()
    out = np.load(fn)
    lock.release()
    return out


def make(arr_size, n_parts, use_lock=True):
    x = np.arange(arr_size, dtype=np.int)
    for i in range(n_parts):
        np.save('%d.npy' % i, x)
    d = [locked_load('%d.npy' % i) for i in range(n_parts)]
    z = [da.from_delayed(a, (arr_size,), np.int) for a in d]
    return da.vstack(z)


def main():
    client = Client()
    a = make(10, 2, True)
    print(a.compute())  # this works.
    a.to_hdf5('test.hdf5', '/a')  # this doesn't.
    client.close()


if __name__ == '__main__':
    main()

结果:

[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]
<...>/site-packages/h5py/__init__.py:36: FutureWarning: 
  Conversion of the second argument of issubdtype from `float` to 
  `np.floating` is deprecated. In future, it will be treated as 
  `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Traceback (most recent call last):
  File "<...>/site-packages/distributed/protocol/pickle.py", line 38, in dumps
    result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
TypeError: can't pickle _thread._local objects

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "lock5.py", line 34, in <module>
    main()
  File "lock5.py", line 29, in main
    a.to_hdf5('test.hdf5', '/a')
  File "<...>/site-packages/dask/array/core.py", line 1243, in to_hdf5
    return to_hdf5(filename, datapath, self, **kwargs)
  File "<...>/site-packages/dask/array/core.py", line 3783, in to_hdf5
    store(list(data.values()), dsets)
  File "<...>/site-packages/dask/array/core.py", line 961, in store
    result.compute(**kwargs)
  File "<...>/site-packages/dask/base.py", line 156, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "<...>/site-packages/dask/base.py", line 402, in compute
    results = schedule(dsk, keys, **kwargs)
  File "<...>/site-packages/distributed/client.py", line 1993, in get
    resources=resources)
  File "<...>/site-packages/distributed/client.py", line 1954, in _graph_to_futures
    'tasks': valmap(dumps_task, dsk3),
  File "cytoolz/dicttoolz.pyx", line 165, in cytoolz.dicttoolz.valmap (cytoolz/dicttoolz.c:3277)
  File "cytoolz/dicttoolz.pyx", line 190, in cytoolz.dicttoolz.valmap (cytoolz/dicttoolz.c:3124)
  File "<...>/site-packages/distributed/worker.py", line 703, in dumps_task
    'args': pickle.dumps(task[1:])}
  File "<...>/site-packages/distributed/protocol/pickle.py", line 51, in dumps
    return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
  File "<...>/site-packages/cloudpickle/cloudpickle.py", line 881, in dumps
    cp.dump(obj)
  File "<...>/site-packages/cloudpickle/cloudpickle.py", line 268, in dump
    return Pickler.dump(self, obj)
  File "<...>/python3.6/pickle.py", line 409, in dump
    self.save(obj)
  File "<...>/python3.6/pickle.py", line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File "<...>/python3.6/pickle.py", line 751, in save_tuple
    save(element)
  File "<...>/python3.6/pickle.py", line 521, in save
    self.save_reduce(obj=obj, *rv)
  File "<...>/python3.6/pickle.py", line 634, in save_reduce
    save(state)
  File "<...>/python3.6/pickle.py", line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File "<...>/python3.6/pickle.py", line 821, in save_dict
    self._batch_setitems(obj.items())
  File "<...>/python3.6/pickle.py", line 847, in _batch_setitems
    save(v)
  File "<...>/python3.6/pickle.py", line 496, in save
    rv = reduce(self.proto)
TypeError: can't pickle _thread._local objects

0 个答案:

没有答案