我有以下问题。我有一个像这样的范围列表:
parameterRanges2 = [(1,5),(1,5),(1,7),(1,7),(0,10),(1,20),(1,3),(0,1)]
我有一个像这样的numpy数组:
arr = np.array([[2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0],
[4.0,2.0,3.0,4.0,2.0,4.0,5.0,1.0,2.0,4.0,1.0,3.0,4.0,2.0,3.0,5.0,1.0,3.0,4.0,2.0],
[2.0,3.0,4.0,6.0,7.0,1.0,2.0,3.0,5.0,6.0,1.0,2.0,4.0,5.0,6.0,2.0,3.0,4.0,5.0,6.0],
[6.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,2.0,2.0,3.0,4.0,5.0],
[8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0],
[11.0,13.0,14.0,16.0,17.0,19.0,1.0,3.0,4.0,6.0,7.0,9.0,10.0,11.0,13.0,14.0,16.0,17.0,19.0,1.0],
[1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0],
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]])
列表中的每个parameterRange元组都与numpy数组中的子数组相对应。有没有办法检查相应范围内的所有元素是否至少存在一次?例如,在numpy数组的第一个子列表中,所有数字1,2,3,4,5至少存在一次,在第二个子列表中存在一次,在thirrd列表中例如数字1,2,3 ,4,5,6,7存在一次等等。
答案 0 :(得分:2)
利用范围是整数,我们可以给出O(nm)解,nxm是arr
的形状。算法的工作原理如下:
np.add.at
有效地(O(mn))生成范围内数字的bincounts
import numpy as np
parameterRanges2 = np.array([(1,5),(1,5),(1,7),(1,7),(0,10),(1,20),(1,3),(0,1)])
arr = np.array([[2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0],
[4.0,2.0,3.0,4.0,2.0,4.0,5.0,1.0,2.0,4.0,1.0,3.0,4.0,2.0,3.0,5.0,1.0,3.0,4.0,2.0],
[2.0,3.0,4.0,6.0,7.0,1.0,2.0,3.0,5.0,6.0,1.0,2.0,4.0,5.0,6.0,2.0,3.0,4.0,5.0,6.0],
[6.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,2.0,2.0,3.0,4.0,5.0],
[8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0],
[11.0,13.0,14.0,16.0,17.0,19.0,1.0,3.0,4.0,6.0,7.0,9.0,10.0,11.0,13.0,14.0,16.0,17.0,19.0,1.0],
[1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0],
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]])
min_occ = 2
dp = np.diff(parameterRanges2, axis=-1)
m = np.zeros((parameterRanges2.shape[0], np.max(dp) + 2), dtype=int)
arr = arr - parameterRanges2[:, :1]
ia = arr.astype(int)
idx = np.where((arr==ia) & (ia>=0) & (ia<=dp), ia, -1)
np.add.at(m, (np.arange(parameterRanges2.shape[0])[:, None], idx), 1)
res = (m[:, :-1] >= min_occ).sum(axis=-1) == dp.ravel() + 1
print(res)
输出:
[ True True False True False False True True]
答案 1 :(得分:0)
使用Numpy函数可能有一种更有效的方法,但下面的代码可行。我无法想到一个简单的Numpy方法,因为我们无法生成ranges
的标准Numpy数组,因为所有行的长度都不一样。
import numpy as np
arr = np.array([
[2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0],
[4.0,2.0,3.0,4.0,2.0,4.0,5.0,1.0,2.0,4.0,1.0,3.0,4.0,2.0,3.0,5.0,1.0,3.0,4.0,2.0],
[2.0,3.0,4.0,6.0,7.0,1.0,2.0,3.0,5.0,6.0,1.0,2.0,4.0,5.0,6.0,2.0,3.0,4.0,5.0,6.0],
[6.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,2.0,2.0,3.0,4.0,5.0],
[8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0],
[11.0,13.0,14.0,16.0,17.0,19.0,1.0,3.0,4.0,6.0,7.0,9.0,10.0,11.0,13.0,14.0,16.0,17.0,19.0,1.0],
[1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0],
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]
])
parameterRanges2 = [(1,5),(1,5),(1,7),(1,7),(0,10),(1,20),(1,3),(0,1)]
ranges = [np.arange(u, v+1, dtype='float64') for u, v in parameterRanges2]
print([np.all(np.isin(u,v)) for u, v in zip(ranges, arr)])
<强>输出强>
[True, True, True, True, False, False, True, True]