分析数组中的数据,删除,然后重新分析VBA

时间:2017-06-07 19:29:26

标签: vba excel-vba excel

现在代码,我可以根据存储在LowConf和HighConf中的值来分析DataSet并检测异常值,这些值基于DataSet的大小。但是,我只能确定一个异常值。我想扩展代码或添加它,以便我可以执行以下操作:

  1. 首先确定最关键的元素
  2. 确定它是否是异常值,如果是,则从DataSet中删除该元素,如果不是,则结束该程序。
  3. 更改样本大小(减去1)并再次分析DataSet以获取下一个MOST外围元素并循环k次。
  4. 我的想法是创建一个新数组,它是来自均值的每个DataSet元素的距离(绝对值),然后找到它的UBound并在下面的If语句中分析它。我的问题是,一旦我确定它是否是异常值,我如何回到DataSet中的相应元素并删除它?如果是这样,有没有更简单的方法来解决这个问题?此外,我是编码的新手,所以有关可接受格式/如何清理代码的任何提示也很受欢迎。

    Sub CalculateOutliers()
    
        Dim n As Integer
        Dim mean As Double
        Dim SD As Double
        Dim k As Integer
        Dim DataSet As Variant
        Dim LowConf As Single
        Dim HighConf As Single
    
    
    '--------------------------------------------------------
        DataSet = Selection.Value
    'Copies highlighted data into DataSet variable
    'Cell A1 is (1,1) Because it starts at 0 which is out of range
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
        n = Selection.CountLarge
    'Counts number of entries
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
     'DEFINES 95(LowConf) AND 99(HighConf) PERCENT CONFIDENCES BASED ON
     'SAMPLE SIZE
    
            If n <= 5 Then
    
                LowConf = 1.72
                HighConf = 1.76
        End If
    
            If n = 6 Then
    
                LowConf = 1.89
                HighConf = 1.97
    
        End If
    
            If n = 7 Then
    
                LowConf = 2.02
                HighConf = 2.14
    
        End If
    
            If n = 8 Then
    
                LowConf = 2.13
                HighConf = 2.28
    
        End If
    
            If n = 9 Then
    
                LowConf = 2.21
                HighConf = 2.39
    
        End If
    
            If n = 10 Then
    
                LowConf = 2.29
                HighConf = 2.48
    
        End If
    
            If n = 11 Then
    
                LowConf = 2.36
                HighConf = 2.56
    
        End If
    
            If n = 12 Then
    
                LowConf = 2.41
                HighConf = 2.64
    
        End If
    
            If n = 13 Then
    
                LowConf = 2.46
                HighConf = 2.7
    
        End If
    
            If n = 14 Then
    
                LowConf = 2.51
                HighConf = 2.75
    
        End If
    
            If n = 15 Then
    
                LowConf = 2.55
                HighConf = 2.81
    
        End If
    
            If n = 16 Then
    
                LowConf = 2.59
                HighConf = 2.85
    
        End If
    
            If n = 17 Then
    
                LowConf = 2.62
                HighConf = 2.9
    
        End If
    
            If n = 18 Then
    
                LowConf = 2.65
                HighConf = 2.93
    
        End If
    
            If n = 19 Then
    
                LowConf = 2.68
                HighConf = 2.97
    
        End If
    
            If n = 20 Then
    
                LowConf = 2.71
                HighConf = 3
    
        End If
    
            If n = 21 Then
    
                LowConf = 2.73
                HighConf = 3.03
    
        End If
    
            If n = 22 Then
    
                LowConf = 2.76
                HighConf = 3.06
    
        End If
    
            If n = 23 Then
    
                LowConf = 2.78
                HighConf = 3.08
    
        End If
    
            If n = 24 Then
    
                LowConf = 2.8
                HighConf = 3.11
    
        End If
    
            If n = 25 Then
    
                LowConf = 2.82
                HighConf = 3.14
    
        End If
    
            If n = 26 Then
    
                LowConf = 2.84
                HighConf = 3.16
    
        End If
    
            If n = 27 Then
    
                LowConf = 2.86
                HighConf = 3.18
    
        End If
    
            If n = 28 Then
    
                LowConf = 2.88
                HighConf = 3.2
    
        End If
    
            If n = 29 Then
    
                LowConf = 2.89
                HighConf = 3.22
    
        End If
    
            If n = 30 Then
    
                LowConf = 2.91
                HighConf = 3.24
    
        End If
    
            If n <= 35 And n > 30 Then
    
                LowConf = 2.98
                HighConf = 3.32
    
        End If
    
            If n <= 40 And n > 35 Then
    
                LowConf = 3.04
                HighConf = 3.38
    
        End If
    
            If n <= 45 And n > 40 Then
    
                LowConf = 3.09
                HighConf = 3.44
    
        End If
    
            If n <= 50 And n > 45 Then
    
                LowConf = 3.13
                HighConf = 3.48
    
        End If
    
            If n <= 60 And n > 50 Then
    
                LowConf = 3.2
                HighConf = 3.56
    
        End If
    
            If n <= 70 And n > 60 Then
    
                LowConf = 3.26
                HighConf = 3.62
    
        End If
    
            If n <= 80 And n > 70 Then
    
                LowConf = 3.31
                HighConf = 3.67
    
        End If
    
            If n <= 90 And n > 80 Then
    
                LowConf = 3.35
                HighConf = 3.72
    
        End If
    
            If n <= 100 And n > 90 Then
    
                LowConf = 3.38
                HighConf = 3.75
    
        End If
    
            If n <= 150 And n > 100 Then
    
                LowConf = 3.52
                HighConf = 3.89
    
        End If
    
            If n <= 200 And n > 150 Then
    
                LowConf = 3.61
                HighConf = 3.98
    
        End If
    
            If n <= 300 And n > 200 Then
    
                LowConf = 3.72
                HighConf = 4.09
    
        End If
    
            If n <= 400 And n > 300 Then
    
                LowConf = 3.8
                HighConf = 4.17
    
        End If
    
            If n <= 500 And n > 400 Then
    
                LowConf = 3.86
                HighConf = 4.32
    
        End If
    
            If n > 500 Then
    
                MsgBox "Sample size cannot exceed 500."
    
        End If
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
            If n < 50 Then
    
                k = Int(n / 10)
    
            Else
    
                k = 5
    
        End If
    'determines k = number of possible outliers
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
        mean = Application.WorksheetFunction.Average(DataSet)
    'Calculates mean of Data Set
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
        SD = Application.WorksheetFunction.StDev(DataSet)
    'Calculates Standard Deviation of Data Set
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
    
    
            For Each element In DataSet
    
    
                If (Abs(element - mean) / SD) > LowConf Then
    
                    MsgBox "95% outlier: " & element
    
    
            End If
    
                If (Abs(element - mean) / SD) > HighConf Then
    
                    MsgBox "99% outlier: " & element
    
    
            End If
    
    
            Next element
    '--------------------------------------------------------
    
    
    End Sub
    

    更新:我已经找到了以下代码块。这个循环应该工作,但现在我只需要弄清楚如何从DataSet中删除Suspect。一旦删除它将循环回来并重新计算平均值和SD和可疑。我知道当UBound等于LBound时我没有代码,但这是我将在此之后处理的事情。使用此代码,Suspect持续显示值1.我不确定为什么这也是我确定DataSet.Remove(可疑)是否有效。

    Dim i As Long
    
        For i = 1 To k
    
    
    '--------------------------------------------------------
        mean = Application.WorksheetFunction.Average(DataSet)
    'Calculates mean of Data Set
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
        SD = Application.WorksheetFunction.StDev(DataSet)
    'Calculates Standard Deviation of Data Set
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
        Dim Suspect As Double
            If (Abs(UBound(DataSet) - mean)) > (Abs(LBound(DataSet) - mean)) Then
                Suspect = UBound(DataSet)
            End If
            If (Abs(UBound(DataSet) - mean)) < (Abs(LBound(DataSet) - mean)) Then
                Suspect = LBound(DataSet)
            End If
    'Defines what the most outlying value is
    '--------------------------------------------------------
    
    
    
    '--------------------------------------------------------
        Dim Retest As Boolean
    
                If (Abs(Suspect - mean) / SD) > LowConf Then
    
                    MsgBox "95% outlier: " & Suspect
                    Retest = True
    
            End If
    
                If (Abs(Suspect - mean) / SD) > HighConf Then
    
                    MsgBox "99% outlier: " & Suspect
                    Retest = True
    
            End If
    
                If Retest = True Then
                    DataSet.Remove (Suspect)
                End If
       MsgBox Suspect
    '--------------------------------------------------------
    Next i
    

    更新:我稍微更改了部分。我可以将其应用到最小但是如何指定可疑的位置并将其用作删除和升档的范围?

                            Dim Retest As Boolean
    
                            If (Abs(Suspect - mean) / SD) > LowConf Then
    
                                MsgBox "95% outlier: " & Suspect
                                Retest = True
    
                        End If
    
                            If (Abs(Suspect - mean) / SD) > HighConf Then
    
                                MsgBox "99% outlier: " & Suspect
                                Retest = True
    
                        End If
                        Dim pos As Range
    
                            Set pos = Application.Match(Suspect, DataSet)
                            MsgBox pos
                            If Retest = True And Suspect = Application.WorksheetFunction.Max(DataSet) Then
    
                               Range(pos).Delete Shift:=xlUp
    
                            End If
    

1 个答案:

答案 0 :(得分:2)

这解决了有关如何清理代码的问题部分。包括空格,你有近300行代码(确定配置级别),可以压缩到大约十几个左右。首先,定义一个函数:

Function ConfLevels(n As Long) As Variant
    Dim i As Long
    Dim cutpoints As Variant, lowconfs As Variant, highconfs As Variant

    cutpoints = Array(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 150, 200, 300, 400, 500)
    lowconfs = Array(1.72, 1.89, 2.02, 2.13, 2.21, 2.29, 2.36, 2.41, 2.46, 2.51, 2.55, 2.59, 2.62, 2.65, 2.68, 2.71, 2.73, 2.76, 2.78, 2.8, 2.82, 2.84, 2.86, 2.88, 2.89, 2.91, 2.98, 3.04, 3.09, 3.13, 3.2, 3.26, 3.31, 3.35, 3.38, 3.52, 3.61, 3.72, 3.8, 3.86)
    highconfs = Array(1.76, 1.97, 2.14, 2.28, 2.39, 2.48, 2.56, 2.64, 2.7, 2.75, 2.81, 2.85, 2.9, 2.93, 2.97, 3#, 3.03, 3.06, 3.08, 3.11, 3.14, 3.16, 3.18, 3.2, 3.22, 3.24, 3.32, 3.38, 3.44, 3.48, 3.56, 3.62, 3.67, 3.72, 3.75, 3.89, 3.98, 4.09, 4.17, 4.32)
    i = 0
    Do While n > cutpoints(i)
        i = i + 1
    Loop
    ConfLevels = Array(lowconfs(i), highconfs(i)) 
End Function

然后在main sub中用以下代码替换所有代码:

Dim levels As Variant 'in the declaration part, use a different name if you want

'if n > 500, handle error and exit sub
levels = ConfLevels(n)
LowConf = levels(0)
HignConf = levels(1)

您甚至可以跳过变量LowConfHighConf并直接使用levels(0)levels(1)

如果您解释了您使用的启发式,则可以通过使用T.Inv.2T等工作表函数来进一步缩短它。这些数字来自哪里?

就你的其余代码而言,一个合理的策略是编写一个递归函数,该函数被提供给VBA数组并返回一个VBA数组,其中删除了所有异常值(基本情况是返回数组不变的一个)因为没有更多的异常值要删除)。主要子(实际上与电子表格和用户交互)可能相对较小。