在Excel中查找和线性插入缺失数据

时间:2011-05-30 00:48:18

标签: excel excel-vba interpolation vba

我跳了一个人可以给我一个在Excel中线性插补缺失数据的手。

我有不同时间间隔收集的数据,为了操纵数据,我需要它排成行。我试图创建一个函数,在列中找到缺失的数据,并根据上面最接近的值和已经过的时间量的线性插值填充它。简单的线性插值适合填充缺失的数据。我有下面显示的示例数据(不幸的是我不会让我发布图像,所以下面的逗号描述数据)。对于开头或结尾的缺失数据,可以留空或复制最接近的值。

感谢您的帮助

示例数据(逗号描述):

Date(days),Time(hours),Data1,Data2,Data3,Data4,Data5,Data6
40684,0.385670139,,,2.049620821,,,
40684,0.385675069,,,,,,0.133139679
40684,0.385680012,,,8.550069731,,,
40684,0.385684954,,,,,,
40684,0.385689884,,,6.308237045,,,
40684,0.385694826,,,,0.012712923,,0.008131037
40684,0.385699769,,,7.00871559,,,
40684,0.385704699,6.153512677,,,0.00487698,,
40684,0.385709641,,12.45628511,5.053182136,,,
40684,0.385714572,,,,,,
40684,0.385719514,,3.908056327,5.79615269,,0.132418943,
40684,0.385724456,,,,,,
40684,0.385729387,,,1.100001911,,0.011925063,
40684,0.385734329,,,,,,
40684,0.385739271,,,4.576754454,,0.114426916,
40684,0.385744201,,,,,,
40684,0.385749144,,,6.64728305,,0.078670107,
40684,0.385754074,,,,,,
40684,0.385759016,,,3.666219416,,,
40684,0.385763958,,,,0.206991693,,
40684,0.385768889,,,7.619782896,,,
40684,0.385773831,,,,,,
40684,0.385778773,,,2.622500183,,,
40684,0.385783704,,,,0.136512025,,
40684,0.385788646,,,4.506286862,,,
40684,0.385793576,,,,,,0.028082778
40684,0.385798519,,,0.28245389,,,
40684,0.385803461,,,,,,0.161361483
40684,0.385808391,,,8.368688368,,,
40684,0.385813333,,,,,,0.001826172
40684,0.385818264,,,2.513474192,,,
40684,0.385823206,,,,,,0.068932257
40684,0.385828148,,,3.908795401,,,
40684,0.385833079,,,,,,
40684,0.385838021,,,6.855867864,,,
40684,0.385842963,,,,,,
40684,0.385847894,,,11.49078845,,,
40684,0.385852836,,,,,,
40684,0.385857766,,,8.035507345,,0.022647571,
40684,0.385862708,9.617499888,,,0.113758152,,
40684,0.385867639,,23.14163433,10.70361681,,0.095718193,
40684,0.385872581,,,,0.073685334,,
40684,0.385877523,,13.93635422,11.24775181,,,
40684,0.385882454,,,,,,

2 个答案:

答案 0 :(得分:1)

您需要的是使用线性插值或三次样条插值数据。您可以对齐x轴。

根据数据量,您需要使用VBA代码为您进行数学计算。不幸的是,从您提供的数据来看,似乎数据中存在大量噪声(或变化)(查看Data6),在短时间内发生了很大变化,大多数方案都会变得不稳定。你可能需要的是首先平滑数据(如果你有很多)然后插入它。

对于线性插值,工作表处于活动状态并运行以下

Option Explicit

Public Sub LinearFit()
Dim rx As Range, ry As Range
Dim x() As Variant, y() As Variant
Dim i As Integer, j As Integer, N As Integer, M As Integer
M = 6 'Columns
N = 44 'Rows
Set rx = Range("A2").Resize(N, 2) ' get x values (2 columns)
x = rx.Value    'Get Day/Time array from worksheet
Dim i_low, i_high As Integer
Dim x_low As Double, x_high As Double
Dim y_low As Double, y_high As Double
Dim x_temp As Double
For j = 1 To M 'go through all columns of data
    Set ry = Range("C2").Offset(0, j - 1).Resize(N, 1)
    y = ry.Value    'Get value array from worksheet
    i_low = 1: i_high = 0 'initialize search values
    Do  'loop starting empty values
        i_high = i_high + 1
    Loop While IsEmpty(y(i_high, 1)) And i_high < N
    'get x for first non-empty cell
    x_high = CDbl(x(i_high, 1)) + 24# * CDbl(x(i_high, 2))
    If IsEmpty(y(i_high, 1)) Then
        Exit For 'if column is empty leave it alone
    Else
        y_high = CDbl(y(i_high, 1)) ' get first non-zero value
    End If
    'Fill empty first cells with constant value
    For i = 1 To i_high - 1
        y(i, 1) = y_high 'fill empty cells with initial value
    Next i
    Do 'loop through sections of empty cells
        i_low = i_high
        x_low = x_high
        y_low = y_high
        'Find next empty block if it exists
        Do
            i_high = i_high + 1
        Loop While IsEmpty(y(i_high, 1)) And i_high < N
        'get x for next block end
        x_high = CDbl(x(i_high, 1)) + 24# * CDbl(x(i_high, 2))
        If IsEmpty(y(i_high, 1)) Then
            y_high = y_low 'if cells empty to end use last value
        Else
            y_high = CDbl(y(i_high, 1)) 'find next value
        End If
        For i = i_low To i_high
            ' x value of interest
            x_temp = CDbl(x(i, 1)) + 24# * CDbl(x(i, 2))
            ' Linear Interpolation (see wikipedia for details)
            y(i, 1) = y_low + (y_high - y_low) * (x_temp - x_low) / (x_high - x_low)
        Next i
    Loop While i_high < N
    ry.Value = y 'put array back into worksheet
Next j

End Sub

你会看到填写的值如下:

Date(days)  Time(hours) Data1   Data2   Data3   Data4   Data5   Data6
40684   0.385670139 6.153512677 12.45628511 2.049620821 0.012712923 0.132418943 0.133139679
40684   0.385675069 6.153512677 12.45628511 5.295565628 0.012712923 0.132418943 0.133139679
40684   0.385680012 6.153512677 12.45628511 8.550069731 0.012712923 0.132418943 0.101863791
40684   0.385684954 6.153512677 12.45628511 7.427790828 0.012712923 0.132418943 0.070594229
40684   0.385689884 6.153512677 12.45628511 6.308237045 0.012712923 0.132418943 0.039400597
40684   0.385694826 6.153512677 12.45628511 6.658440881 0.012712923 0.132418943 0.008131037
40684   0.385699769 6.153512677 12.45628511 7.00871559  0.008789793 0.132418943 0.009129735
40684   0.385704699 6.153512677 12.45628511 6.032137384 0.00487698  0.132418943 0.010125807
40684   0.385709641 6.261854761 12.45628511 5.053182136 0.021732662 0.132418943 0.011124303
40684   0.385714572 6.369955702 8.186932606 5.424253533 0.038550828 0.132418943 0.012120577

答案 1 :(得分:0)

我相信我已经想出了一种在单元格中完成它的方法。此公式将从第二行开始,在A列中显示时间,在B列中显示gappy数据,在列C中插入内插数据。请注意,但在最后几行中失败,因为可能没有更多值可供查找。

{= IF(ISBLANK(B2),C1 +(OFFSET(B2,IF(ISBLANK(B2),MATCH(1,IF(ISBLANK(B2:B $ 999),0,1),0),B2) ,0) - C1)/(OFFSET($ A2,IF(ISBLANK(B2),MATCH(1,IF(ISBLANK(B2:B $ 999),0,1),0),B2),0) - $ A1 )*($ A2 - $ A1),B2)}