我正在比较工作簿中的工作表。该工作簿有两个名为PRE和POST的工作表,每个工作表中有19个相同的列。行数每天都不同,但在特定日期的两张纸上相同。宏将PRE工作表中的每一行与POST工作表中的相应行进行比较,如果它们相同,则删除两个工作表中的行。
我有通常建议的提高性能的方法,如屏幕更新设置为FALSE等。
我想优化两个FOR NEXT
循环。
Dim RESULT As String
iPRE = ActiveWorkbook.Worksheets("PRE").Range("A1", Worksheets("PRE").Range("A1").End(xlDown)).Rows.Count
'MsgBox iPRE
iPOST = ActiveWorkbook.Worksheets("POST").Range("A1", Worksheets("POST").Range("A1").End(xlDown)).Rows.Count
'MsgBox iPOST
If iPRE <> iPOST Then
MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
Exit Sub
Else
iRows = iPRE
End If
'Optimize Performance
Application.ScreenUpdating = False
EventState = Application.EnableEvents
Application.EnableEvents = False
CalcState = Application.Calculation
Application.Calculation = xlCalculationManual
PageBreakState = ActiveSheet.DisplayPageBreaks
ActiveSheet.DisplayPageBreaks = False
For iCntr = iRows To 2 Step -1
For y = 1 To 20
If Worksheets("PRE").Cells(iCntr, y) <> Worksheets("POST").Cells(iCntr, y) Then
RESULT = "DeleteN"
Exit For
Else
RESULT = "DeleteY"
End If
Next y
If RESULT = "DeleteY" Then
Worksheets("PRE").Rows(iCntr).Delete
Worksheets("POST").Rows(iCntr).Delete
End If
Next iCntr
'Revert optmizing lines
ActiveSheet.DisplayPageBreaks = PageBreakState
Application.Calculation = CalcState
Application.EnableEvents = EventState
Application.ScreenUpdating = True
End Sub
答案 0 :(得分:4)
对工作表单元格的任何引用都很慢。当你在循环中执行它时,这会大大增加。最好的速度增加将来自限制这些工作表参考。
一种好方法是复制Variant Arrays中的数据,并循环遍历这些数据,构建一个包含要保留的数据的新Variant数组。然后将新阵列一次性放在旧的阵列上。
使用200,000行,20列,50%文本,50%数字,删除170,000行的测试数据集:此代码在我的硬件上运行大约30秒
Sub Mine2()
Dim T1 As Long, T2 As Long, T3 As Long
Dim ResDelete As Boolean
Dim iPRE As Long, iPOST As Long
Dim EventState As Boolean, CalcState As XlCalculation, PageBreakState As Boolean
Dim iCntr As Long, y As Long, iRows As Long
Dim rPre As Range, rPost As Range
Dim PreDat As Variant, PostDat As Variant, PreDelDat As Variant, PostDelDat As Variant
Dim n As Long
Dim wsPre As Worksheet, wsPost As Worksheet
Set wsPre = ActiveWorkbook.Worksheets("PRE")
With wsPre
Set rPre = .Range(.Cells(1, .Columns.Count).End(xlToLeft), .Cells(.Rows.Count, 1).End(xlUp))
PreDat = rPre.Value
iPRE = UBound(PreDat, 1)
'MsgBox iPRE
End With
Set wsPost = ActiveWorkbook.Worksheets("POST")
With wsPost
Set rPost = .Range(.Cells(1, .Columns.Count).End(xlToLeft), .Cells(.Rows.Count, 1).End(xlUp))
PostDat = rPost.Value
iPOST = UBound(PostDat, 1)
'MsgBox iPOST
End With
If iPRE <> iPOST Then
MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
Exit Sub
End If
iRows = iPRE
ReDim PreDelDat(1 To UBound(PreDat, 1), 1 To UBound(PreDat, 2))
ReDim PostDelDat(1 To UBound(PostDat, 1), 1 To UBound(PostDat, 2))
n = 1
On Error GoTo EH:
'Optimize Performance
Application.ScreenUpdating = False
EventState = Application.EnableEvents
Application.EnableEvents = False
CalcState = Application.Calculation
Application.Calculation = xlCalculationManual
PageBreakState = ActiveSheet.DisplayPageBreaks
ActiveSheet.DisplayPageBreaks = False
T1 = GetTickCount
For y = 1 To UBound(PreDat, 2)
PreDelDat(1, y) = PreDat(1, y)
PostDelDat(1, y) = PostDat(1, y)
Next
n = 2
For iCntr = 2 To UBound(PreDat, 1)
ResDelete = True
For y = 1 To UBound(PreDat, 2)
If PreDat(iCntr, y) <> PostDat(iCntr, y) Then
ResDelete = False
Exit For
End If
Next y
If Not ResDelete Then
For y = 1 To UBound(PreDat, 2)
PreDelDat(n, y) = PreDat(iCntr, y)
PostDelDat(n, y) = PostDat(iCntr, y)
Next
n = n + 1
End If
Next iCntr
T2 = GetTickCount
Debug.Print "Compare Done in:", T2 - T1
Debug.Print "Rows to delete:", n - 1
rPre = PreDelDat
rPost = PostDelDat
T3 = GetTickCount
Debug.Print "Delete Done In:", T3 - T1
CleanUp:
'Revert optmizing lines
On Error Resume Next
ActiveSheet.DisplayPageBreaks = PageBreakState
Application.Calculation = CalcState
Application.EnableEvents = EventState
Application.ScreenUpdating = True
Exit Sub
EH:
' Handle Errors here
Debug.Assert False
Resume
Err.Clear
Resume CleanUp
End Sub
原件:
一种好方法是复制Variant Arrays中的数据,然后循环遍历这些数据,构建对要删除的单元格的引用。然后一次性删除。
其他一般提示:
End(xlUp)
以避免意外空白失败(除非您希望停在第一个空白处)重构代码:
Sub Demo()
Dim ResDelete As Boolean
Dim iPRE As Long, iPOST As Long
Dim EventState As Boolean, CalcState As XlCalculation, PageBreakState As Boolean
Dim iCntr As Long, y As Long, iRows As Long
Dim rPreDelete As Range, rPostDelete As Range
Dim PreDat As Variant, PostDat As Variant
With ActiveWorkbook.Worksheets("PRE")
PreDat = .Range(.Cells(1, 20), .Cells(.Rows.Count, 1).End(xlUp)).Value
iPRE = UBound(PreDat, 1)
'MsgBox iPRE
End With
With ActiveWorkbook.Worksheets("POST")
PostDat = .Range(.Cells(1, 20), .Cells(.Rows.Count, 1).End(xlUp)).Value
iPOST = UBound(PostDat, 1)
'MsgBox iPOST
End With
If iPRE <> iPOST Then
MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
Exit Sub
End If
iRows = iPRE
On Error GoTo EH:
'Optimize Performance
Application.ScreenUpdating = False
EventState = Application.EnableEvents
Application.EnableEvents = False
CalcState = Application.Calculation
Application.Calculation = xlCalculationManual
PageBreakState = ActiveSheet.DisplayPageBreaks
ActiveSheet.DisplayPageBreaks = False
For iCntr = 2 To UBound(PreDat, 1)
ResDelete = True
For y = 1 To 20
If PreDat(iCntr, y) <> PostDat(iCntr, y) Then
ResDelete = False
Exit For
End If
Next y
If ResDelete Then
If rPreDelete Is Nothing Then
Set rPreDelete = Worksheets("PRE").Rows(iCntr)
Set rPostDelete = Worksheets("POST").Rows(iCntr)
Else
Set rPreDelete = Application.Union(rPreDelete, Worksheets("PRE").Rows(iCntr))
Set rPostDelete = Application.Union(rPostDelete, Worksheets("POST").Rows(iCntr))
End If
End If
Next iCntr
If Not rPreDelete Is Nothing Then
rPreDelete.Delete
rPostDelete.Delete
End If
CleanUp:
'Revert optmizing lines
On Error Resume Next
ActiveSheet.DisplayPageBreaks = PageBreakState
Application.Calculation = CalcState
Application.EnableEvents = EventState
Application.ScreenUpdating = True
Exit Sub
EH:
' Handle Errors here
Resume CleanUp
End Sub
答案 1 :(得分:2)
如果我可以把我的两分钱,这是我的建议。
我测试了原始代码(唯一的更改是For y = 1 to 10
而不是For y = 1 to 20
)和我的代码针对2张10列和(最初500,000)250,000行数据每。我使用10而不是20的原因在于我不知道列中的数据是什么,作为替代,我使用了1或2的随机值。
2^10 = 1,024
种可能性。2^20 = 1,048,576
种可能性。由于我想在每个表中至少有几行相等的可能性,我选择了10列方案。
为宏计时我设置了一个定时器宏,它调用宏来比较和删除数据。
为了能够比较结果,在启动Excel并使用完全相同的数据打开文件后,两个宏都直接执行。
我有
Active
Sub CompareAndDelete()
Dim WsPre As Worksheet, WsPost As Worksheet
Dim Row As Long, Column As Long
Dim ArrPre() As Variant, ArrPost() As Variant
Dim DeleteRow As Boolean
Dim DeletePre As Range, DeletePost As Range
With Application
.ScreenUpdating = False
.Calculation = xlCalculationManual
.EnableEvents = False
End With
With ThisWorkbook
Set WsPre = .Worksheets("PRE")
Set WsPost = .Worksheets("Post")
End With
ArrPre = WsPre.Range(WsPre.Cells(1, 1), WsPre.Cells(WsPre.Cells(WsPre.Rows.Count, 1).End(xlUp).Row, 20))
ArrPost = WsPost.Range(WsPost.Cells(1, 1), WsPost.Cells(WsPost.Cells(WsPost.Rows.Count, 1).End(xlUp).Row, 20))
If Not UBound(ArrPre, 1) = UBound(ArrPost, 1) Then
MsgBox "Unequal number of rows in sheets PRE and POST. Exiting macro.", vbCritical, "Unequal sheets"
Else
For Row = 2 To UBound(ArrPre, 1)
DeleteRow = True
For Column = 1 To UBound(ArrPre, 2)
If Not ArrPre(Row, Column) = ArrPost(Row, Column) Then
DeleteRow = False
Exit For
End If
Next Column
If DeleteRow = True Then
If DeletePre Is Nothing Then
Set DeletePre = WsPre.Rows(Row)
Set DeletePost = WsPost.Rows(Row)
Else
Set DeletePre = Union(DeletePre, WsPre.Rows(Row))
Set DeletePost = Union(DeletePost, WsPost.Rows(Row))
End If
End If
Next Row
If Not DeletePre Is Nothing Then DeletePre.Delete
If Not DeletePost Is Nothing Then DeletePost.Delete
End If
With Application
.ScreenUpdating = True
.Calculation = xlCalculationAutomatic
.EnableEvents = True
End With
End Sub
我的代码 - 500,000行数据。
在14,23秒内处理了500.000行和10列的数据表,发现561行相等且已被删除。
原始代码 - 500,000行数据。
不幸的是,我的系统无法处理此任务,Excel停止工作。
<小时/>
我的代码 - 250,000行数据。
250.000行和10列的数据表已在4,72秒内处理完毕,已发现313行相等且已被删除。
原始代码 - 250,000行数据。
250.000行和10列的数据表已在14,07秒内处理完毕,已发现313行相等且已被删除。
答案 2 :(得分:-1)
也许你可以进行2次调整,虽然它们的性能影响很小:
' prepare references to worksheets
Dim WorksheetPRE As Worksheet
Dim WorksheetPOST As Worksheet
Set WorksheetPRE = ActiveWorkbook.Worksheets("PRE")
Set WorksheetPOST = ActiveWorkbook.Worksheets("POST")
然后,在您的代码中,将ActiveWorkbook.Worksheets("PRE")
替换为WorksheetPRE
等。
我认为当你留在Excel中时没有其他重要的优化。请记住,Microsoft Excel主要是spreadsheet计算器,而不是数据表处理工具。
如果我真的需要加快比较,那么我会采用以下方法之一:
将Excel工作表链接到Microsoft Access作为表并在Access中执行比较(最简单)
如上所述,但不是链接表格,而是 import
如上所述,但使用Microsoft SQL Server(Express版本免费)