我正在尝试使用数据集上的简单groupby操作以dask来实现numba。在单个系统上它可以正常工作,但是当我继续将其应用于分布式系统时,它给出了我无法解决的错误打通。请帮助。谢谢。
这是代码。
import pandas as pd
import time
import dask as dask
import dask.distributed as distributed
import dask.dataframe as dd
import dask.delayed as delayed
from dask.distributed import Client,progress
from numba import jit
@jit
def group_sum(data_frame):
data_frame = data_frame.groupby(['col1'])[['col2']].sum(split_out=10)
return data_frame
client = Client('IP:PORT')
print client
print client.scheduler_info()
f = []
chunksize = 10 ** 6
for chunk in pd.read_csv('file.csv', chunksize=chunksize):
f_in = client.scatter(chunk)
f.append(f_in)
ddf = dd.from_delayed(f)
ddf = group_sum(ddf)
c = ddf.to_delayed()
future = client.compute(c)
progress(future)
result = client.gather(future)
print result
错误回溯如下:
f_in = client.scatter(chunk)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1773, in scatter
asynchronous=asynchronous, hash=hash)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 652, in sync
return sync(self.loop, func, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 275, in sync
six.reraise(*error[0])
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 260, in f
result[0] = yield make_coro()
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1641, in _scatter
timeout=timeout)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 552, in send_recv_from_rpc
result = yield send_recv(comm=comm, op=key, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1113, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 446, in send_recv
six.reraise(*clean_exception(**response))
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 321, in handle_comm
result = yield result
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/scheduler.py", line 2155, in scatter
report=False)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/utils_comm.py", line 128, in scatter_to_workers
for address, v in d.items()])
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 208, in All
result = yield tasks.next()
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 610, in send_recv_from_rpc
result = yield send_recv(comm=comm, op=key, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1113, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 446, in send_recv
six.reraise(*clean_exception(**response))
File "<string>", line 3, in reraise
TypeError: raise: arg 3 must be a traceback or None