Question

我想从scipy.signal库中使用savgol_filter过滤在线数据。但是当我试图将它用于在线数据时（当新元素逐一出现时）我意识到savgol_filter与在线数据一起工作时有一些延迟（window_length // 2）与它对离线数据的工作方式相比（它们的元素是可以一次性计算）。我使用类似的代码（见下文）

from queue import Queue, Empty
import numpy as np
from scipy.signal import savgol_filter

window_size = 5
data = list()
q = Queue()
d = [2.22, 2.22, 5.55, 2.22, 1.11, 0.01, 1.11, 4.44, 9.99, 1.11, 3.33]
for i in d:
    q.put(i)

res = list()
while not q.empty():
    element = q.get()
    data.append(element)
    length = len(data)
    npd = np.array(data[length - window_size:])
    if length >= window_size:
        res.append(savgol_filter(npd , window_size, 2)[window_size // 2])

npd = np.array(data)
res2 = savgol_filter(npd , window_size, 2)

np.set_printoptions(precision=2)
print('source data ', npd)
print('online res  ', np.array(res))
print('offline res ', res2)

我的假设是对的吗？可以以某种方式纠正吗？
如果我是对的，请您在计算中建议使用类似的过滤器吗？

Answer 1

感谢您更新问题！

问题在于，对于online_res方法，您缺少部分数据。边缘值由scipy的savgol_filter处理，但不适用于您的手动编码版本。

对于您的示例，请查看两个结果：

＆＃39;在线res＆＃39;：数组（[3.93,3.17,0.73,0.2,1.11,5.87,6.37]））

＆＃39;离线res＆＃39;：数组（[1.84,3.52,3.93,3.17,0.73,0.2,1.11,5.87,6.37,5.3,1.84]））

它们是相同的，但offline res负责值data[0:2]和data[-2:]。在您的情况下，如果未指定具体的mode，则会将其设置为默认值interpolate：

当选择'interp'模式时（默认值），没有扩展名用过的。相反，度多项式多项式适合最后一个 window_length值的边缘，这个多项式用于评估最后一个window_length // 2输出值。

这是你没有为你的online res做的。

我为双方实施了一个简单的polynomial fit，然后得到完全相同的结果：

from queue import Queue, Empty
import numpy as np
from scipy.signal import savgol_filter

window_size = 5
data = list()
q = Queue()
d = [2.22, 2.22, 5.55, 2.22, 1.11, 0.01, 1.11, 4.44, 9.99, 1.11, 3.33]
for i in d:
    q.put(i)

res = list()
while not q.empty():
    element = q.get()
    data.append(element)
    length = len(data)
    npd = np.array(data[length - window_size:])
    if length >= window_size:
        res.append(savgol_filter(npd, window_size, 2)[window_size//2])

# calculate the polynomial fit for elements 0,1,2,3,4
poly = np.polyfit(range(window_size), d[0:window_size], deg=2)
p = np.poly1d(poly)
res.insert(0, p(0)) # insert the polynomial fits at index 0 and 1
res.insert(1, p(1))

# calculate the polynomial fit for the 5 last elements (range runs like [4,3,2,1,0])
poly = np.polyfit(range(window_size-1, -1, -1), d[-window_size:], deg=2)
p = np.poly1d(poly)
res.append(p(1))
res.append(p(0))

npd = np.array(data)
res2 = savgol_filter(npd, window_size, 2)


diff = res - res2 # in your example you were calculating the wrong diff btw
np.set_printoptions(precision=2)
print('source data ', npd)
print('online res  ', np.array(res))
print('offline res ', res2)
print('error       ', diff.sum())

结果：

>>> Out: ('erorr   ', -7.9936057773011271e-15)

编辑：此版本独立于d - 列表，这意味着它可以消化从源中获取的任何数据。

window_size = 5
half_window_size = window_size // 2 # this variable is used often
data = list()
q = Queue()
d = [2.22, 2.22, 5.55, 2.22, 1.11, 0.01, 1.11, 4.44, 9.99, 1.11, 3.33]
for i in d:
    q.put(i)  
res = [None]*window_size # create list of correct size instead of appending

while not q.empty():
    element = q.get()
    data.append(element)
    length = len(data)
    npd = np.array(data[length - window_size:])

    if length == window_size: # this is called only once, when reaching the filter-center
        # calculate the polynomial fit for elements 0,1,2,3,4
        poly = np.polyfit(range(window_size), data, deg=2)
        p = np.poly1d(poly)

        for poly_i in range(half_window_size): # independent from window_size
            res[poly_i] = p(poly_i) 

        # insert the sav_gol-value at index 2
        res[(length-1)-half_window_size] = savgol_filter(npd, window_size, 2)[half_window_size] 

        poly = np.polyfit(range(window_size - 1, -1, -1), data[-window_size:], deg=2)
        p = np.poly1d(poly)
        for poly_i_end in range(half_window_size):
            res[(window_size-1)-poly_i_end] = p(poly_i_end)

    elif length > window_size:
        res.append(None) # add another slot in the res-list
        # overwrite poly-value with savgol
        res[(length-1)-half_window_size] = savgol_filter(npd, window_size, 2)[half_window_size] 

        # extrapolate again into the future
        poly = np.polyfit(range(window_size - 1, -1, -1), data[-window_size:], deg=2)
        p = np.poly1d(poly)
        for poly_i_end in range(half_window_size):
            res[-poly_i_end-1] = p(poly_i_end)

Answer 2

看起来卡尔曼滤波器系列正在做我期望的事情。这是因为它们在＆＃34;均方误差＆＃34;方面是最佳的。例如，here可以找到实现。

使用scipy.signal库

2 个答案: