Question

我有两个带有时间戳（浮点值）的排序arrray，我想找到每个值的最接近的值，然后获得该值的索引。

这是我的代码到目前为止，但我正在寻找更快的东西。我还检查了其他解决方案，但到目前为止还没有更快......

t =包含所有时间戳的数组01

    for index, time in enumerate(timestamps_array02):   
        #find closest timestamp
        i = bisect_left(t, float (time))
        value = min(t[max(0, i-1): i+2], key=lambda t: abs(float(time) - float(t)))
        idx = np.where(t==value) #Getting the index
        x.append(gps[idx[0][0]])

Answer 1

np.searchsorted是二分的矢量化版本。在我的有限测试中，它提供与您的代码相同的结果，但速度要快得多：

m, n = 100, 100
OP                    1.39766530 ms
pp0                   0.00914090 ms
m, n = 2000, 100
OP                    2.14626830 ms
pp0                   0.01182020 ms
m, n = 100000, 10000
OP                  798.53475470 ms
pp0                   0.48502260 ms

代码：

import numpy as np
from bisect import bisect_left
from heapq import merge
from timeit import timeit
import types

def mock_data(m, n):
    t0 = np.cumsum(np.random.randint(1, 3*np.maximum(1, n//m), (m,)))
    t1 = np.cumsum(np.random.randint(1, 3*np.maximum(1, m//n), (n,)))
    return t0, t1

def f_OP(t, timestamps_array02):
    x = []
    for index, time in enumerate(timestamps_array02):   
        #find closest timestamp
        i = bisect_left(t, float (time))
        value = min(t[max(0, i-1): i+2], key=lambda t: abs(float(time) - float(t)))
        idx = np.where(t==value) #Getting the index
        x.append(idx[0][0])
    return x

def f_pp0(t0, t1):
    idx = t0.searchsorted(t1)
    over = idx.searchsorted(len(t0)-1)
    under = idx.searchsorted(0, 'right')
    idx[:under] = 1
    idx[over:] = len(t0)-1
    idx -= ((t1-t0[idx-1]) <= (t0[idx]-t1))
    return idx

for m, n in[(100, 100), (2000, 100), (100000, 10000)]:
    data = mock_data(m, n)
    ref = f_OP(*data)
    print(f'm, n = {m}, {n}')
    for name, func in list(globals().items()):
        if not name.startswith('f_') or not isinstance(func, types.FunctionType):
            continue
        try:
            assert np.all(ref==func(*data))
            print("{:16s}{:16.8f} ms".format(name[2:], timeit(
                'f(*data)', globals={'f':func, 'data':data}, number=10)*100))
        except:
            print("{:16s} apparently failed".format(name[2:]))

从Python中排序数组的最接近数字返回索引

1 个答案: