我想知道是否存在一种更快的方法来对另一个列表的值指定的范围内的列表中的值进行计数。当前,我使用了bisect_left
,但这需要sorted
先验,这已被证明会降低代码的效率。任何更快的解决方案?
代码是:
>>> import numpy as np
>>> from _bisect import bisect_left
>>> import time
>>> from itertools import izip
>>> list1 = [np.random.randn(200) for i in xrange(3)]
>>> list2 = [np.random.randn(80) for i in xrange(1000)]
>>> def test():
tt = time.time()
list2x = [sorted(x) for x in list2]
for x in list1:
nr = []
nrapp = nr.append
for y in list2x:
lj = len(y)
bx = [bisect_left(y, xk) for xk in x]
nk = sum(1 for k,xk in izip(bx, x) if (k>0 and xk-y[k-1]<=0.2) or (k<lj and y[k]-xk<=0.2))
nrapp(nk)
print time.time()-tt
>>> for i in xrange(10): test()
0.786000013351
0.608999967575
0.546999931335
0.575000047684
0.629999876022
0.59299993515
0.546999931335
0.536000013351
0.575000047684
0.625
为此目的的一个示例可以是:
>>> x1 = np.random.randn(10)
>>> x1
array([ 0.43165934, -0.11926191, 1.7257972 , -0.92569184, 1.37651344,
1.26895329, 0.89498147, 0.32699621, 0.11069324, -0.00309866])
>>> x2 = np.random.randn(20)
>>> x2
array([-0.65556355, -1.680542 , 1.07821383, -1.14390854, -1.13806358,
0.01698883, -1.44074658, 1.41329607, -0.92578986, -1.87628167,
-0.50014942, -0.02853544, 2.16239462, 0.50030708, 1.11585082,
-0.55437825, 0.47246503, 0.59957544, -1.59937406, -1.8100736 ])
>>> sum(any(abs(xk-xj)<=0.2 for xk in x2) for xj in x1)
9
要计算x1
中的值的数量位于[-0.2, 0.2]
中的任何值的list x2
容忍度内
答案 0 :(得分:1)
方法1:使用searchsorted
-
x2s = np.sort(x2) # or x2.sort() and use x2 instead of x2s
idx0 = np.searchsorted(x2s,x1).clip(max=len(x2s)-1)
idx1 = (idx0-1).clip(min=0)
out = ((np.abs(x2s[idx0]-x1) <= 0.2) | (np.abs(x1-x2s[idx1]) <= 0.2)).sum()
方法2:使用broadcasting
-
(np.abs(x1[:,None] - x2)<=0.2).any(1).sum()
利用multi-core
with numexpr
module处理大数据-
import numexpr as ne
ne.evaluate('abs(x1e-x2)<=0.2',{'x1e':x1[:,None]}).any(1).sum()