用移动窗口的正态分布替换nans

时间:2018-08-22 12:05:55

标签: python numpy random nan

我需要用numpy中的局部正态分布替换一维数组的NaN。我选择一个窗口,计算该窗口的均值和标准差,然后使用正态分布替换NaN,而其余信号保持不变。

import numpy as np

def replace_nan(signal, window = 5):
    """
    calculate moving average and std of signal window without nan    
    replaces nan values with normal distribution (mean, std)    
    """
# add padding in case signal starts/ends with nan
    signal = np.pad(signal, (window, window), 'mean', stat_length = 2*window)    

    for k in range(window,len(signal)-window):        
        mean = np.nanmean(signal[k-window:k+window])  # window average 
        std = np.nanstd(signal[k-window:k+window]) # window std without nan 

        ind = np.where(np.isnan(signal[k-window:k+window]))[0]    
        print (ind)   
        signal[ind]= np.random.normal(mean, std)

    signal = signal[window:len(signal)-window] #remove padding

    return signal

#tester 
signal = np.array([0.71034849, 0.17730998, 0.77577915, 0.38308111, 
0.24278947, np.nan, np.nan, 0.68694097, 0.6684736 , 0.47310845, 0.22210945, 
0.1189111, np.nan, np.nan, np.nan, 0.5573841 , 0.57531205, 0.74131346, 
0.29088101, 0.5573841 , 0.57531205, 0.74131346, np.nan, np.nan, np.nan, 
np.nan, 0.49534304, 0.18370482, 0.06089498, 0.22210945, 0.1189111])        

signal = replace_nan(signal, 5)

print(signal)

我用正态分布np.random.normal()将nans替换为均值为5的移动窗口计算出的均值和std,当我选择信号窗口的这些nans替换它们时,出了点问题。应该很容易,我只是python的完整入门者。

1 个答案:

答案 0 :(得分:3)

我还没有测试数字是否准确,但是我认为这会起作用:

import numpy as np

def replace_nan(signal, window = 5):
    """
    calculate moving average and std of signal window without nan
    replaces nan values with normal distribution (mean, std)
    """

    # add padding in case signal starts/ends with nan
    signal = np.pad(signal, (window, window), 'mean', stat_length = 2*window)

    for k in range(window, len(signal) - window + 1):
        mean = np.nanmean(signal[k-window:k+window])  # window average
        std = np.nanstd(signal[k-window:k+window]) # window std without nan

        if np.isnan(signal[k]):
            signal[k] = np.random.normal(mean, std)

    signal = signal[window:len(signal)-window] #remove padding

    return signal

#tester
signal = np.array(
    [
        0.71034849, 0.17730998, 0.77577915, 0.38308111, 0.24278947, np.nan,
        np.nan, 0.68694097, 0.6684736 , 0.47310845, 0.22210945, 0.1189111,
        np.nan, np.nan, np.nan, 0.5573841 , 0.57531205, 0.74131346,
        0.29088101, 0.5573841 , 0.57531205, 0.74131346, np.nan, np.nan,
        np.nan, np.nan, 0.49534304, 0.18370482, 0.06089498, 0.22210945,
        0.1189111
    ]
)

print("Before:")
print(signal)

signal = replace_nan(signal, 5)

print("\nAfter:")
print(signal)

这给出了:

Before:
[ 0.71034849  0.17730998  0.77577915  0.38308111  0.24278947         nan
         nan  0.68694097  0.6684736   0.47310845  0.22210945  0.1189111
         nan         nan         nan  0.5573841   0.57531205  0.74131346
  0.29088101  0.5573841   0.57531205  0.74131346         nan         nan
         nan         nan  0.49534304  0.18370482  0.06089498  0.22210945
  0.1189111 ]

After:
[ 0.71034849  0.17730998  0.77577915  0.38308111  0.24278947  0.35960417
  0.508657    0.68694097  0.6684736   0.47310845  0.22210945  0.1189111
  0.50282732  0.34906067  0.31206557  0.5573841   0.57531205  0.74131346
  0.29088101  0.5573841   0.57531205  0.74131346  0.80133879  0.63122315
  0.49236281  0.35630875  0.49534304  0.18370482  0.06089498  0.22210945
  0.1189111 ]