现在代码,我可以根据存储在LowConf和HighConf中的值来分析DataSet并检测异常值,这些值基于DataSet的大小。但是,我只能确定一个异常值。我想扩展代码或添加它,以便我可以执行以下操作:
我的想法是创建一个新数组,它是来自均值的每个DataSet元素的距离(绝对值),然后找到它的UBound并在下面的If语句中分析它。我的问题是,一旦我确定它是否是异常值,我如何回到DataSet中的相应元素并删除它?如果是这样,有没有更简单的方法来解决这个问题?此外,我是编码的新手,所以有关可接受格式/如何清理代码的任何提示也很受欢迎。
Sub CalculateOutliers()
Dim n As Integer
Dim mean As Double
Dim SD As Double
Dim k As Integer
Dim DataSet As Variant
Dim LowConf As Single
Dim HighConf As Single
'--------------------------------------------------------
DataSet = Selection.Value
'Copies highlighted data into DataSet variable
'Cell A1 is (1,1) Because it starts at 0 which is out of range
'--------------------------------------------------------
'--------------------------------------------------------
n = Selection.CountLarge
'Counts number of entries
'--------------------------------------------------------
'--------------------------------------------------------
'DEFINES 95(LowConf) AND 99(HighConf) PERCENT CONFIDENCES BASED ON
'SAMPLE SIZE
If n <= 5 Then
LowConf = 1.72
HighConf = 1.76
End If
If n = 6 Then
LowConf = 1.89
HighConf = 1.97
End If
If n = 7 Then
LowConf = 2.02
HighConf = 2.14
End If
If n = 8 Then
LowConf = 2.13
HighConf = 2.28
End If
If n = 9 Then
LowConf = 2.21
HighConf = 2.39
End If
If n = 10 Then
LowConf = 2.29
HighConf = 2.48
End If
If n = 11 Then
LowConf = 2.36
HighConf = 2.56
End If
If n = 12 Then
LowConf = 2.41
HighConf = 2.64
End If
If n = 13 Then
LowConf = 2.46
HighConf = 2.7
End If
If n = 14 Then
LowConf = 2.51
HighConf = 2.75
End If
If n = 15 Then
LowConf = 2.55
HighConf = 2.81
End If
If n = 16 Then
LowConf = 2.59
HighConf = 2.85
End If
If n = 17 Then
LowConf = 2.62
HighConf = 2.9
End If
If n = 18 Then
LowConf = 2.65
HighConf = 2.93
End If
If n = 19 Then
LowConf = 2.68
HighConf = 2.97
End If
If n = 20 Then
LowConf = 2.71
HighConf = 3
End If
If n = 21 Then
LowConf = 2.73
HighConf = 3.03
End If
If n = 22 Then
LowConf = 2.76
HighConf = 3.06
End If
If n = 23 Then
LowConf = 2.78
HighConf = 3.08
End If
If n = 24 Then
LowConf = 2.8
HighConf = 3.11
End If
If n = 25 Then
LowConf = 2.82
HighConf = 3.14
End If
If n = 26 Then
LowConf = 2.84
HighConf = 3.16
End If
If n = 27 Then
LowConf = 2.86
HighConf = 3.18
End If
If n = 28 Then
LowConf = 2.88
HighConf = 3.2
End If
If n = 29 Then
LowConf = 2.89
HighConf = 3.22
End If
If n = 30 Then
LowConf = 2.91
HighConf = 3.24
End If
If n <= 35 And n > 30 Then
LowConf = 2.98
HighConf = 3.32
End If
If n <= 40 And n > 35 Then
LowConf = 3.04
HighConf = 3.38
End If
If n <= 45 And n > 40 Then
LowConf = 3.09
HighConf = 3.44
End If
If n <= 50 And n > 45 Then
LowConf = 3.13
HighConf = 3.48
End If
If n <= 60 And n > 50 Then
LowConf = 3.2
HighConf = 3.56
End If
If n <= 70 And n > 60 Then
LowConf = 3.26
HighConf = 3.62
End If
If n <= 80 And n > 70 Then
LowConf = 3.31
HighConf = 3.67
End If
If n <= 90 And n > 80 Then
LowConf = 3.35
HighConf = 3.72
End If
If n <= 100 And n > 90 Then
LowConf = 3.38
HighConf = 3.75
End If
If n <= 150 And n > 100 Then
LowConf = 3.52
HighConf = 3.89
End If
If n <= 200 And n > 150 Then
LowConf = 3.61
HighConf = 3.98
End If
If n <= 300 And n > 200 Then
LowConf = 3.72
HighConf = 4.09
End If
If n <= 400 And n > 300 Then
LowConf = 3.8
HighConf = 4.17
End If
If n <= 500 And n > 400 Then
LowConf = 3.86
HighConf = 4.32
End If
If n > 500 Then
MsgBox "Sample size cannot exceed 500."
End If
'--------------------------------------------------------
'--------------------------------------------------------
If n < 50 Then
k = Int(n / 10)
Else
k = 5
End If
'determines k = number of possible outliers
'--------------------------------------------------------
'--------------------------------------------------------
mean = Application.WorksheetFunction.Average(DataSet)
'Calculates mean of Data Set
'--------------------------------------------------------
'--------------------------------------------------------
SD = Application.WorksheetFunction.StDev(DataSet)
'Calculates Standard Deviation of Data Set
'--------------------------------------------------------
'--------------------------------------------------------
For Each element In DataSet
If (Abs(element - mean) / SD) > LowConf Then
MsgBox "95% outlier: " & element
End If
If (Abs(element - mean) / SD) > HighConf Then
MsgBox "99% outlier: " & element
End If
Next element
'--------------------------------------------------------
End Sub
更新:我已经找到了以下代码块。这个循环应该工作,但现在我只需要弄清楚如何从DataSet中删除Suspect。一旦删除它将循环回来并重新计算平均值和SD和可疑。我知道当UBound等于LBound时我没有代码,但这是我将在此之后处理的事情。使用此代码,Suspect持续显示值1.我不确定为什么这也是我确定DataSet.Remove(可疑)是否有效。
Dim i As Long
For i = 1 To k
'--------------------------------------------------------
mean = Application.WorksheetFunction.Average(DataSet)
'Calculates mean of Data Set
'--------------------------------------------------------
'--------------------------------------------------------
SD = Application.WorksheetFunction.StDev(DataSet)
'Calculates Standard Deviation of Data Set
'--------------------------------------------------------
'--------------------------------------------------------
Dim Suspect As Double
If (Abs(UBound(DataSet) - mean)) > (Abs(LBound(DataSet) - mean)) Then
Suspect = UBound(DataSet)
End If
If (Abs(UBound(DataSet) - mean)) < (Abs(LBound(DataSet) - mean)) Then
Suspect = LBound(DataSet)
End If
'Defines what the most outlying value is
'--------------------------------------------------------
'--------------------------------------------------------
Dim Retest As Boolean
If (Abs(Suspect - mean) / SD) > LowConf Then
MsgBox "95% outlier: " & Suspect
Retest = True
End If
If (Abs(Suspect - mean) / SD) > HighConf Then
MsgBox "99% outlier: " & Suspect
Retest = True
End If
If Retest = True Then
DataSet.Remove (Suspect)
End If
MsgBox Suspect
'--------------------------------------------------------
Next i
更新:我稍微更改了部分。我可以将其应用到最小但是如何指定可疑的位置并将其用作删除和升档的范围?
Dim Retest As Boolean
If (Abs(Suspect - mean) / SD) > LowConf Then
MsgBox "95% outlier: " & Suspect
Retest = True
End If
If (Abs(Suspect - mean) / SD) > HighConf Then
MsgBox "99% outlier: " & Suspect
Retest = True
End If
Dim pos As Range
Set pos = Application.Match(Suspect, DataSet)
MsgBox pos
If Retest = True And Suspect = Application.WorksheetFunction.Max(DataSet) Then
Range(pos).Delete Shift:=xlUp
End If
答案 0 :(得分:2)
这解决了有关如何清理代码的问题部分。包括空格,你有近300行代码(确定配置级别),可以压缩到大约十几个左右。首先,定义一个函数:
Function ConfLevels(n As Long) As Variant
Dim i As Long
Dim cutpoints As Variant, lowconfs As Variant, highconfs As Variant
cutpoints = Array(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 150, 200, 300, 400, 500)
lowconfs = Array(1.72, 1.89, 2.02, 2.13, 2.21, 2.29, 2.36, 2.41, 2.46, 2.51, 2.55, 2.59, 2.62, 2.65, 2.68, 2.71, 2.73, 2.76, 2.78, 2.8, 2.82, 2.84, 2.86, 2.88, 2.89, 2.91, 2.98, 3.04, 3.09, 3.13, 3.2, 3.26, 3.31, 3.35, 3.38, 3.52, 3.61, 3.72, 3.8, 3.86)
highconfs = Array(1.76, 1.97, 2.14, 2.28, 2.39, 2.48, 2.56, 2.64, 2.7, 2.75, 2.81, 2.85, 2.9, 2.93, 2.97, 3#, 3.03, 3.06, 3.08, 3.11, 3.14, 3.16, 3.18, 3.2, 3.22, 3.24, 3.32, 3.38, 3.44, 3.48, 3.56, 3.62, 3.67, 3.72, 3.75, 3.89, 3.98, 4.09, 4.17, 4.32)
i = 0
Do While n > cutpoints(i)
i = i + 1
Loop
ConfLevels = Array(lowconfs(i), highconfs(i))
End Function
然后在main sub中用以下代码替换所有代码:
Dim levels As Variant 'in the declaration part, use a different name if you want
'if n > 500, handle error and exit sub
levels = ConfLevels(n)
LowConf = levels(0)
HignConf = levels(1)
您甚至可以跳过变量LowConf
和HighConf
并直接使用levels(0)
和levels(1)
。
如果您解释了您使用的启发式,则可以通过使用T.Inv.2T
等工作表函数来进一步缩短它。这些数字来自哪里?
就你的其余代码而言,一个合理的策略是编写一个递归函数,该函数被提供给VBA数组并返回一个VBA数组,其中删除了所有异常值(基本情况是返回数组不变的一个)因为没有更多的异常值要删除)。主要子(实际上与电子表格和用户交互)可能相对较小。