我尝试将topk例程与dask隔离开来。 它不知何故孤立地死去。
显然,numpy数组而不是dask数组在递归期间传递给x参数。
topk的原始源代码位于:https://github.com/dask/dask/blob/master/dask/array/routines.py
测试程序:
import numpy as np
import dask.array as da
from dask.base import tokenize
from operator import getitem
import dask.sharedict as sharedict
from dask.array.core import Array
def topk(k, x):
if x.ndim != 1:
raise ValueError("Topk only works on arrays of one dimension")
token = tokenize(k, x)
name = 'chunk.topk-' + token
dsk = {(name, i): (topk, k, key)
for i, key in enumerate(x.__dask_keys__())}
name2 = 'topk-' + token
dsk[(name2, 0)] = (getitem, (np.sort, (np.concatenate, list(dsk))),
slice(-1, -k - 1, -1))
chunks = ((k,),)
return Array(sharedict.merge((name2, dsk), x.dask), name2, chunks, dtype=x.dtype)
def main():
x = np.arange(12)*8
y = da.from_array(x, 7)
print(y.topk(2).compute())
print(topk(2, y).compute())
main()
错误:
File "test_dask_argtopk.py", line 40, in <module>
main()
File "test_dask_argtopk.py", line 38, in main
print(topk(2, y).compute())
File "test_dask_argtopk.py", line 27, in topk
for i, key in enumerate(x.__dask_keys__())}
AttributeError: 'Array' object has no attribute '__dask_keys__'