Python:检查列表中的每个元素是否都存在特定时间

时间:2017-11-21 13:00:03

标签: python arrays numpy scipy

我有以下问题。我有一个像这样的范围列表:

parameterRanges2 = [(1,5),(1,5),(1,7),(1,7),(0,10),(1,20),(1,3),(0,1)]

我有一个像这样的numpy数组:

arr = np.array([[2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0],
[4.0,2.0,3.0,4.0,2.0,4.0,5.0,1.0,2.0,4.0,1.0,3.0,4.0,2.0,3.0,5.0,1.0,3.0,4.0,2.0],
[2.0,3.0,4.0,6.0,7.0,1.0,2.0,3.0,5.0,6.0,1.0,2.0,4.0,5.0,6.0,2.0,3.0,4.0,5.0,6.0],
[6.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,2.0,2.0,3.0,4.0,5.0],
[8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0],
[11.0,13.0,14.0,16.0,17.0,19.0,1.0,3.0,4.0,6.0,7.0,9.0,10.0,11.0,13.0,14.0,16.0,17.0,19.0,1.0],
[1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0],
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]])

列表中的每个parameterRange元组都与numpy数组中的子数组相对应。有没有办法检查相应范围内的所有元素是否至少存在一次?例如,在numpy数组的第一个子列表中,所有数字1,2,3,4,5至少存在一次,在第二个子列表中存在一次,在thirrd列表中例如数字1,2,3 ,4,5,6,7存在一次等等。

2 个答案:

答案 0 :(得分:2)

利用范围是整数,我们可以给出O(nm)解,nxm是arr的形状。算法的工作原理如下:

  • 丢弃所有非int元素及其范围之外的所有元素
  • 使用np.add.at有效地(O(mn))生成范围内数字的bincounts
  • 计算每行中的上述阈值区间并与范围进行比较

import numpy as np

parameterRanges2 = np.array([(1,5),(1,5),(1,7),(1,7),(0,10),(1,20),(1,3),(0,1)])

arr = np.array([[2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0],
[4.0,2.0,3.0,4.0,2.0,4.0,5.0,1.0,2.0,4.0,1.0,3.0,4.0,2.0,3.0,5.0,1.0,3.0,4.0,2.0],
[2.0,3.0,4.0,6.0,7.0,1.0,2.0,3.0,5.0,6.0,1.0,2.0,4.0,5.0,6.0,2.0,3.0,4.0,5.0,6.0],
[6.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,2.0,2.0,3.0,4.0,5.0],
[8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0],
[11.0,13.0,14.0,16.0,17.0,19.0,1.0,3.0,4.0,6.0,7.0,9.0,10.0,11.0,13.0,14.0,16.0,17.0,19.0,1.0],
[1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0],
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]])

min_occ = 2

dp = np.diff(parameterRanges2, axis=-1)
m = np.zeros((parameterRanges2.shape[0], np.max(dp) + 2), dtype=int)
arr = arr - parameterRanges2[:, :1]
ia = arr.astype(int)
idx = np.where((arr==ia) & (ia>=0) & (ia<=dp), ia, -1)
np.add.at(m, (np.arange(parameterRanges2.shape[0])[:, None], idx), 1)
res = (m[:, :-1] >= min_occ).sum(axis=-1) == dp.ravel() + 1
print(res)

输出:

[ True  True False  True False False  True  True]

答案 1 :(得分:0)

使用Numpy函数可能有一种更有效的方法,但下面的代码可行。我无法想到一个简单的Numpy方法,因为我们无法生成ranges的标准Numpy数组,因为所有行的长度都不一样。

import numpy as np

arr = np.array([
    [2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0,1.0,3.0,2.0,4.0,2.0,4.0,3.0,5.0],
    [4.0,2.0,3.0,4.0,2.0,4.0,5.0,1.0,2.0,4.0,1.0,3.0,4.0,2.0,3.0,5.0,1.0,3.0,4.0,2.0],
    [2.0,3.0,4.0,6.0,7.0,1.0,2.0,3.0,5.0,6.0,1.0,2.0,4.0,5.0,6.0,2.0,3.0,4.0,5.0,6.0],
    [6.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,2.0,2.0,3.0,4.0,5.0],
    [8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0],
    [11.0,13.0,14.0,16.0,17.0,19.0,1.0,3.0,4.0,6.0,7.0,9.0,10.0,11.0,13.0,14.0,16.0,17.0,19.0,1.0],
    [1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0]
])

parameterRanges2 = [(1,5),(1,5),(1,7),(1,7),(0,10),(1,20),(1,3),(0,1)]
ranges = [np.arange(u, v+1, dtype='float64') for u, v in parameterRanges2]

print([np.all(np.isin(u,v)) for u, v in zip(ranges, arr)])

<强>输出

[True, True, True, True, False, False, True, True]