达斯分布与numba给出错误

时间:2018-08-22 11:18:30

标签: python-2.7 dask numba dask-distributed

我正在尝试使用数据集上的简单groupby操作以dask来实现numba。在单个系统上它可以正常工作,但是当我继续将其应用于分布式系统时,它给出了我无法解决的错误打通。请帮助。谢谢。
这是代码。

    import pandas as pd
    import time
    import dask as dask
    import dask.distributed as distributed
    import dask.dataframe as dd
    import dask.delayed as delayed
    from dask.distributed import Client,progress
    from numba import jit

    @jit 
    def group_sum(data_frame):
    data_frame = data_frame.groupby(['col1'])[['col2']].sum(split_out=10)
    return data_frame

   client = Client('IP:PORT')
   print client
   print client.scheduler_info()
   f = []
   chunksize = 10 ** 6
   for chunk in pd.read_csv('file.csv', chunksize=chunksize):
   f_in = client.scatter(chunk)
   f.append(f_in)
   ddf = dd.from_delayed(f)
   ddf = group_sum(ddf)
   c = ddf.to_delayed()
   future = client.compute(c)
   progress(future)
   result = client.gather(future)
   print result

错误回溯如下:

    f_in = client.scatter(chunk)
    File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1773, in scatter
asynchronous=asynchronous, hash=hash)
    File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 652, in sync
return sync(self.loop, func, *args, **kwargs)
    File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 275, in sync
six.reraise(*error[0])
    File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 260, in f
result[0] = yield make_coro()
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
   File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
   File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1641, in _scatter
timeout=timeout)
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
   File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
   File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 552, in send_recv_from_rpc
result = yield send_recv(comm=comm, op=key, **kwargs)
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
   File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1113, in run
yielded = self.gen.send(value)
   File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 446, in send_recv
six.reraise(*clean_exception(**response))
   File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 321, in handle_comm
result = yield result
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
   File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
   File "/usr/local/lib/python2.7/site-packages/distributed/scheduler.py", line 2155, in scatter
report=False)
   File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
  File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
  File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
  File "/usr/local/lib/python2.7/site-packages/distributed/utils_comm.py", line 128, in scatter_to_workers
for address, v in d.items()])
  File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
  File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
  File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
  File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 208, in All
result = yield tasks.next()
  File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
  File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
  File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
  File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 610, in send_recv_from_rpc
result = yield send_recv(comm=comm, op=key, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
  File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
  File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1113, in run
yielded = self.gen.send(value)
  File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 446, in send_recv
six.reraise(*clean_exception(**response))
  File "<string>", line 3, in reraise
  TypeError: raise: arg 3 must be a traceback or None

0 个答案:

没有答案