我有一个电子表格,我已经工作了一个多月来排序和优化坐标(有时超过100,000行),一旦我开始导入超过5,000个文件,它就 UNBEARABLY 慢行(在25,000行的数据集上完成计算和排序过程需要几个小时)。处理时间随着导入的坐标数呈指数增长。我已经研究过Stack Overflow以帮助我处理一些代码,并包含一些用于错误处理的安全网,如果没有数据则退出sub。
我用来实际排序坐标以查找最近邻居坐标并且我需要帮助的代码位于注释下
' Sort coordinates in Point List Data looking for shortest distance between points
,位于我的代码中的第58行第109行。
简单象限坐标(X,Y和Z)分别位于H,I和J列中,从第6行开始。命名范围为 PosXYZ ,此命名范围的公式为:
=INDEX(Optimizer!$H:$H, ROW(Optimizer!$H$5) + 1):INDEX(Optimizer!$L:$L, MATCH(bignum, Optimizer!$I:$I)).
bignum 定义为=1E+307*17.9769313486231
。
Column K
填充了毕达哥拉斯定理,以计算当前数据点X,Y与列表中先前数据点X,Y之间的距离。
Column L
填充了导入数据时创建的连续行号列表,以便可以使用单独的VBA代码恢复数据的原始排序顺序。
我试图查看使用数组是否会大大加快运行此点列表优化器所需的时间,并且我希望有人能够帮助我弄清楚如何让我的代码部分以指数级的速度运行。
我发现了以下类似问题,我想知道这种方法是否可用于帮助加快处理时间: How do you speed up VBA code with a named range?
我从这个网站上学到了很多东西,我希望有人有耐心和知识帮助我解决这个问题。我没有在VBA中使用数组的经验。
可以找到包含2904个数据点和VBA代码的示例Excel文件here。
Sub Optimize_PL()
' Add an error handler
On Error GoTo ErrorHandler
' Speed up sub-routine by turning off screen updating and auto calculating until the end of the sub-routine
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
' Declare variable names and types
Dim rInp As Range
Dim rTmp As Range
Dim i As Long
Dim n As Long
Dim sFrm As String
Dim PosX As String
Dim PosY As String
Dim PosZ As String
Dim SortOrder As String
Dim LastRow As Long
Dim hLastRow As Long
Dim lLastRow As Long
' Find number of populated cells in Column H and Column L (not including the 5 column header rows)
hLastRow = Cells(Rows.Count, "H").End(xlUp).Row - 5
lLastRow = Cells(Rows.Count, "L").End(xlUp).Row - 5
' Check for existing Point List Data to avoid error
If hLastRow < 2 Then
MsgBox "Not enough data points are available to optimize." & vbNewLine & _
"" & vbNewLine & _
"Column H populated rows: " & hLastRow, vbInformation, "Error Message"
GoTo ErrorHandler
ElseIf lLastRow < 2 Then
MsgBox "Original sort order row numbers not available in Column L," & vbNewLine & _
"" & vbNewLine & _
"Original sort order canot be restored without Row # data." & vbNewLine & _
"Column L populated rows: " & lLastRow, vbInformation, "Error Message"
Err.Number = 0
GoTo ErrorHandler
ElseIf hLastRow <> lLastRow Then
MsgBox "The number of rows of coordinate data does not match the" & vbNewLine & _
"number of rows in the Row # column. There is no way to" & vbNewLine & _
"restore the original sort order." & vbNewLine & _
"" & vbNewLine & _
"Column H populated rows: " & hLastRow & vbNewLine & _
"Column L populated rows: " & lLastRow, vbInformation, "Error Message"
Err.Number = 0
GoTo ErrorHandler
End If
' Timer Start (calculate the length of time this VBA code takes to complete)
StartTime = Timer
' Sort coordinates in Point List Data looking for shortest distance between points
Set rInp = Range("PosXYZ").Resize(, 4)
n = rInp.Rows.Count
i = 0
For i = 1 To n - 1
Application.StatusBar = i + 1 & " of " & n & " Calculating for " & SecondsElapsed & " seconds" & " Estimated Time Remaining: " & TimeRemaining & " seconds"
SecondsElapsed = Round(Timer - StartTime) ' Change to StartTime, 2) to display seconds two decimal places out
TimeRemaining = Round((SecondsElapsed / (i + 1)) * (n - (i + 1))) ' Change to i + 1)),2) to display seconds two decimal places out
Set rTmp = rInp.Offset(i).Resize(n - i, 5)
With rTmp
PosX = .Cells(0, 1).Address(ReferenceStyle:=xlR1C1)
PosY = .Cells(0, 2).Address(ReferenceStyle:=xlR1C1)
PosZ = .Cells(0, 3).Address(ReferenceStyle:=xlR1C1)
SortOrder = .Cells(0, 5).Address(ReferenceStyle:=xlR1C1)
sFrm = Replace(Replace(Replace(Replace("=SQRT((RC[-3] - PosX)^2 + (RC[-2] - PosY)^2)", "PosX", PosX), "PosY", PosY), "PosZ", PosZ), "SortOrder", SortOrder)
sFrm = Replace(Replace(Replace(Replace(sFrm, "PosX", PosX), "PosY", PosY), "PosZ", PosZ), "SortOrder", SortOrder)
.Columns(4).FormulaR1C1 = sFrm
.Sort Key1:=.Range("D1"), Header:=xlNo
End With
Next i
' Timer Stop (calculate the length of time this VBA code took to complete)
SecondsElapsed = Round(Timer - StartTime, 2)
' Turn screen updating and auto calculating back on since file processing is now complete
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
' Message to report VBA code processing time after file selection and number of data rows imported
MsgBox "Calculated optimized travel path between coordinates in " & vbNewLine & _
"" & vbNewLine & _
" " & SecondsElapsed & " seconds"
' Reset to defaults in the event of a processing error during the sub-routine execution
ErrorHandler:
Application.StatusBar = True
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
If Err.Number <> 0 Then
' Display a message to the user including the error code in the event of an error during execution
MsgBox "An error number " & Err.Number & " was encountered!" & vbNewLine & _
"Part or all of this VBA code was not completed.", vbInformation, "Error Message"
End If
End Sub
答案 0 :(得分:6)
是的,你可以使用数组加速这段代码:下面的代码快了大约20倍。
Sub Optimize_PL2()
' Add an error handler
On Error GoTo ErrorHandler
' Speed up sub-routine by turning off screen updating and auto calculating until the end of the sub-routine
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
' Define variable names and types
Dim i As Long
Dim j As Long
Dim k As Long
Dim hLastRow As Long
Dim lLastRow As Long
Dim varData As Variant
Dim dData() As Double
Dim dResult() As Double
Dim jRow() As Long
Dim dThisDist As Double
Dim dSmallDist As Double
Dim jSmallRow As Long
' Find number of populated cells in Column H and Column L (not including the 5 column header rows)
hLastRow = Cells(Rows.Count, "H").End(xlUp).Row - 5
lLastRow = Cells(Rows.Count, "L").End(xlUp).Row - 5
' Check for existing Point List Data to avoid error
If hLastRow < 2 Then
MsgBox "Not enough data points are available to optimize." & vbNewLine & _
"" & vbNewLine & _
"Column H populated rows: " & hLastRow, vbInformation, "Error Message"
GoTo ErrorHandler
ElseIf lLastRow < 2 Then
MsgBox "Original sort order row numbers not available in Column L," & vbNewLine & _
"" & vbNewLine & _
"Original sort order canot be restored without Row # data." & vbNewLine & _
"Column L populated rows: " & lLastRow, vbInformation, "Error Message"
Err.Number = 0
GoTo ErrorHandler
ElseIf hLastRow <> lLastRow Then
MsgBox "The number of rows of coordinate data does not match the" & vbNewLine & _
"number of rows in the Row # column. There is no way to" & vbNewLine & _
"restore the original sort order." & vbNewLine & _
"" & vbNewLine & _
"Column H populated rows: " & hLastRow & vbNewLine & _
"Column L populated rows: " & lLastRow, vbInformation, "Error Message"
Err.Number = 0
GoTo ErrorHandler
End If
On Error GoTo 0
' Timer Start (calculate the length of time this VBA code takes to complete)
StartTime = Timer
varData = Worksheets("Optimizer").Range("H6").Resize(hLastRow, 5).Value2
ReDim dResult(1 To hLastRow, 1 To 5) As Double
ReDim dData(1 To hLastRow, 1 To 5) As Double
'
' copy vardata into data coercing to double
' (repeated arithmetic is faster on doubles than variants)
'
For j = LBound(varData) To UBound(varData)
For k = LBound(varData, 2) To UBound(varData, 2)
dData(j, k) = CDbl(varData(j, k))
If j = 1 Then
dResult(j, k) = dData(j, k)
End If
Next k
Next j
'
' look for shortest distance row
'
For i = LBound(dResult) To UBound(dResult) - 1
'
' calc distance from this row to all remaining rows and find shortest
'
jSmallRow = -1
dSmallDist = 1 * 10 ^ 307
For j = 2 To UBound(dData)
If dData(j, 3) > -1 And j <> i Then
dThisDist = Sqr((dResult(i, 1) - dData(j, 1)) ^ 2 + (dResult(i, 2) - dData(j, 2)) ^ 2)
If dThisDist < dSmallDist Then
jSmallRow = j
dSmallDist = dThisDist
End If
End If
Next j
'
' copy jsmallrow row to i+1
'
If jSmallRow > -1 Then
For k = 1 To 2
dResult(i + 1, k) = dData(jSmallRow, k)
Next k
dResult(i + 1, 4) = dSmallDist
dResult(i + 1, 5) = jSmallRow
'
' set smallrow so it does not get used again
'
dData(jSmallRow, 3) = -1
End If
Next i
'
' put data back on sheet
'
Worksheets("Optimizer").Range("H6").Resize(hLastRow, 5).Value2 = dResult
' Timer Stop (calculate the length of time this VBA code took to complete)
SecondsElapsed = Round(Timer - StartTime, 2)
' Turn screen updating and auto calculating back on since file processing is now complete
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
' Message to report VBA code processing time after file selection and number of data rows imported
MsgBox "Calculated optimized travel path between coordinates in " & vbNewLine & _
"" & vbNewLine & _
" " & SecondsElapsed & " seconds"
' Reset to defaults in the event of a processing error during the sub-routine execution
ErrorHandler:
Application.StatusBar = True
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
If Err.Number <> 0 Then
' Display a message to the user including the error code in the event of an error during execution
MsgBox "An error number " & Err.Number & " was encountered!" & vbNewLine & _
"Part or all of this VBA code was not completed.", vbInformation, "Error Message"
End If
End Sub
答案 1 :(得分:3)
除了切换到不同的算法(例如k-d树)之外,以下是一些可以加速代码的事情:
更新的代码:
Const HeaderRow = 5
Set rInp = Range(Cells(HeaderRow + 1, 8), Cells(hLastRow, 11))
n = rInp.Rows.Count
For i = 1 To n - 1
If i Mod 100 = 0 Then
Application.StatusBar = i + 1 & " of " & n & " Calculating for " & SecondsElapsed & " seconds" & " Estimated Time Remaining: " & TimeRemaining & " seconds"
SecondsElapsed = Round(Timer - StartTime) ' Change to StartTime, 2) to display seconds two decimal places out
TimeRemaining = Round((SecondsElapsed / (i + 1)) * (n - (i + 1))) ' Change to i + 1)),2) to display seconds two decimal places out
End If
Set rTmp = rInp.Offset(i).Resize(n - i, 5)
With rTmp
Dim TargetRow As Long
TargetRow = HeaderRow + i
sFrm = "=SQRT((RC[-3] - R" & TargetRow & "C[-3])^2 + (RC[-2] - R" & TargetRow & "C[-2])^2)"
With .Columns(4)
.FormulaR1C1 = sFrm
.Calculate
.Value = .Value
End With
.Sort Key1:=.Range("D1"), Header:=xlNo
End With
Next i