改善FOR循环的性能

时间:2016-01-02 07:04:35

标签: performance excel-vba vba excel

我正在比较工作簿中的工作表。该工作簿有两个名为PRE和POST的工作表,每个工作表中有19个相同的列。行数每天都不同,但在特定日期的两张纸上相同。宏将PRE工作表中的每一行与POST工作表中的相应行进行比较,如果它们相同,则删除两个工作表中的行。

我有通常建议的提高性能的方法,如屏幕更新设置为FALSE等。

我想优化两个FOR NEXT循环。

Dim RESULT As String

iPRE = ActiveWorkbook.Worksheets("PRE").Range("A1", Worksheets("PRE").Range("A1").End(xlDown)).Rows.Count
'MsgBox iPRE
iPOST = ActiveWorkbook.Worksheets("POST").Range("A1", Worksheets("POST").Range("A1").End(xlDown)).Rows.Count
'MsgBox iPOST

If iPRE <> iPOST Then
    MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
    Exit Sub

Else
    iRows = iPRE
End If

 'Optimize Performance

    Application.ScreenUpdating = False

    EventState = Application.EnableEvents
    Application.EnableEvents = False

    CalcState = Application.Calculation
    Application.Calculation = xlCalculationManual

    PageBreakState = ActiveSheet.DisplayPageBreaks
    ActiveSheet.DisplayPageBreaks = False

    For iCntr = iRows To 2 Step -1
        For y = 1 To 20
            If Worksheets("PRE").Cells(iCntr, y) <> Worksheets("POST").Cells(iCntr, y) Then
                RESULT = "DeleteN"
                Exit For
            Else
                RESULT = "DeleteY"
            End If
        Next y

        If RESULT = "DeleteY" Then
            Worksheets("PRE").Rows(iCntr).Delete
            Worksheets("POST").Rows(iCntr).Delete
        End If
    Next iCntr

    'Revert optmizing lines

    ActiveSheet.DisplayPageBreaks = PageBreakState
    Application.Calculation = CalcState
    Application.EnableEvents = EventState
    Application.ScreenUpdating = True

End Sub

3 个答案:

答案 0 :(得分:4)

对工作表单元格的任何引用都很慢。当你在循环中执行它时,这会大大增加。最好的速度增加将来自限制这些工作表参考。

一种好方法是复制Variant Arrays中的数据,并循环遍历这些数据,构建一个包含要保留的数据的新Variant数组。然后将新阵列一次性放在旧的阵列上。

使用200,000行,20列,50%文本,50%数字,删除170,000行的测试数据集:此代码在我的硬件上运行大约30秒

Sub Mine2()
    Dim T1 As Long, T2 As Long, T3 As Long

    Dim ResDelete As Boolean
    Dim iPRE As Long, iPOST As Long
    Dim EventState  As Boolean, CalcState As XlCalculation, PageBreakState As Boolean
    Dim iCntr As Long, y As Long, iRows As Long
    Dim rPre As Range, rPost As Range

    Dim PreDat As Variant, PostDat As Variant, PreDelDat As Variant, PostDelDat As Variant

    Dim n As Long
    Dim wsPre As Worksheet, wsPost As Worksheet

    Set wsPre = ActiveWorkbook.Worksheets("PRE")
    With wsPre
        Set rPre = .Range(.Cells(1, .Columns.Count).End(xlToLeft), .Cells(.Rows.Count, 1).End(xlUp))
        PreDat = rPre.Value
        iPRE = UBound(PreDat, 1)
        'MsgBox iPRE
    End With

    Set wsPost = ActiveWorkbook.Worksheets("POST")
    With wsPost
        Set rPost = .Range(.Cells(1, .Columns.Count).End(xlToLeft), .Cells(.Rows.Count, 1).End(xlUp))
        PostDat = rPost.Value
        iPOST = UBound(PostDat, 1)
        'MsgBox iPOST
    End With

    If iPRE <> iPOST Then
        MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
        Exit Sub
    End If
    iRows = iPRE


    ReDim PreDelDat(1 To UBound(PreDat, 1), 1 To UBound(PreDat, 2))
    ReDim PostDelDat(1 To UBound(PostDat, 1), 1 To UBound(PostDat, 2))
    n = 1
    On Error GoTo EH:
 'Optimize Performance

    Application.ScreenUpdating = False
    EventState = Application.EnableEvents
    Application.EnableEvents = False

    CalcState = Application.Calculation
    Application.Calculation = xlCalculationManual

    PageBreakState = ActiveSheet.DisplayPageBreaks
    ActiveSheet.DisplayPageBreaks = False


    T1 = GetTickCount
    For y = 1 To UBound(PreDat, 2)
        PreDelDat(1, y) = PreDat(1, y)
        PostDelDat(1, y) = PostDat(1, y)
    Next

    n = 2
    For iCntr = 2 To UBound(PreDat, 1)
        ResDelete = True
        For y = 1 To UBound(PreDat, 2)
            If PreDat(iCntr, y) <> PostDat(iCntr, y) Then
                ResDelete = False
                Exit For
            End If
        Next y

        If Not ResDelete Then
            For y = 1 To UBound(PreDat, 2)
                PreDelDat(n, y) = PreDat(iCntr, y)
                PostDelDat(n, y) = PostDat(iCntr, y)
            Next
            n = n + 1
        End If
    Next iCntr
    T2 = GetTickCount
    Debug.Print "Compare Done in:", T2 - T1
    Debug.Print "Rows to delete:", n - 1

    rPre = PreDelDat
    rPost = PostDelDat

    T3 = GetTickCount
    Debug.Print "Delete Done In:", T3 - T1
CleanUp:
    'Revert optmizing lines
    On Error Resume Next
    ActiveSheet.DisplayPageBreaks = PageBreakState
    Application.Calculation = CalcState
    Application.EnableEvents = EventState
    Application.ScreenUpdating = True
Exit Sub
EH:
    ' Handle Errors here
    Debug.Assert False
    Resume
    Err.Clear
    Resume CleanUp
End Sub

原件:

一种好方法是复制Variant Arrays中的数据,然后循环遍历这些数据,构建对要删除的单元格的引用。然后一次性删除。

其他一般提示:

  • 声明所有变量
  • 使用更合适的数据类型(Long,Boolean)
  • 使用End(xlUp)以避免意外空白失败(除非您希望停在第一个空白处)

重构代码:

Sub Demo()
    Dim ResDelete As Boolean
    Dim iPRE As Long, iPOST As Long
    Dim EventState  As Boolean, CalcState As XlCalculation, PageBreakState As Boolean
    Dim iCntr As Long, y As Long, iRows As Long
    Dim rPreDelete As Range, rPostDelete As Range

    Dim PreDat As Variant, PostDat As Variant

    With ActiveWorkbook.Worksheets("PRE")
        PreDat = .Range(.Cells(1, 20), .Cells(.Rows.Count, 1).End(xlUp)).Value
        iPRE = UBound(PreDat, 1)
        'MsgBox iPRE
    End With

    With ActiveWorkbook.Worksheets("POST")
        PostDat = .Range(.Cells(1, 20), .Cells(.Rows.Count, 1).End(xlUp)).Value
        iPOST = UBound(PostDat, 1)
        'MsgBox iPOST
    End With

    If iPRE <> iPOST Then
        MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
        Exit Sub
    End If
    iRows = iPRE

    On Error GoTo EH:
 'Optimize Performance

    Application.ScreenUpdating = False
    EventState = Application.EnableEvents
    Application.EnableEvents = False

    CalcState = Application.Calculation
    Application.Calculation = xlCalculationManual

    PageBreakState = ActiveSheet.DisplayPageBreaks
    ActiveSheet.DisplayPageBreaks = False

    For iCntr = 2 To UBound(PreDat, 1)
        ResDelete = True
        For y = 1 To 20
            If PreDat(iCntr, y) <> PostDat(iCntr, y) Then
                ResDelete = False
                Exit For
            End If
        Next y

        If ResDelete Then
            If rPreDelete Is Nothing Then
                Set rPreDelete = Worksheets("PRE").Rows(iCntr)
                Set rPostDelete = Worksheets("POST").Rows(iCntr)
            Else
                Set rPreDelete = Application.Union(rPreDelete, Worksheets("PRE").Rows(iCntr))
                Set rPostDelete = Application.Union(rPostDelete, Worksheets("POST").Rows(iCntr))
            End If
        End If
    Next iCntr
    If Not rPreDelete Is Nothing Then
        rPreDelete.Delete
        rPostDelete.Delete
    End If

CleanUp:
    'Revert optmizing lines
    On Error Resume Next
    ActiveSheet.DisplayPageBreaks = PageBreakState
    Application.Calculation = CalcState
    Application.EnableEvents = EventState
    Application.ScreenUpdating = True
Exit Sub
EH:
    ' Handle Errors here

    Resume CleanUp
End Sub

答案 1 :(得分:2)

如果我可以把我的两分钱,这是我的建议。

我测试了原始代码(唯一的更改是For y = 1 to 10而不是For y = 1 to 20)和我的代码针对2张10列和(最初500,000)250,000行数据每。我使用10而不是20的原因在于我不知道列中的数据是什么,作为替代,我使用了1或2的随机值。

  • 对于10列,这意味着有2^10 = 1,024种可能性。
  • 对于20列,这意味着有2^20 = 1,048,576种可能性。

由于我想在每个表中至少有几行相等的可能性,我选择了10列方案。

为宏计时我设置了一个定时器宏,它调用宏来比较和删除数据。

为了能够比较结果,在启动Excel并使用完全相同的数据打开文件后,两个宏都直接执行。

我有

  • 避免了Active
  • 的所有实例
  • 最大限度地减少了Excel和VBA之间的数据读取和写入,这是通过收集二维数组中的工作表上的所有数据然后分析数组来实现的。
  • 收集要在范围内删除的行(每张1个)并删除要在循环外删除的所有行

守则

Sub CompareAndDelete()
    Dim WsPre As Worksheet, WsPost As Worksheet
    Dim Row As Long, Column As Long
    Dim ArrPre() As Variant, ArrPost() As Variant
    Dim DeleteRow As Boolean
    Dim DeletePre As Range, DeletePost As Range

    With Application
        .ScreenUpdating = False
        .Calculation = xlCalculationManual
        .EnableEvents = False
    End With

    With ThisWorkbook
        Set WsPre = .Worksheets("PRE")
        Set WsPost = .Worksheets("Post")
    End With

    ArrPre = WsPre.Range(WsPre.Cells(1, 1), WsPre.Cells(WsPre.Cells(WsPre.Rows.Count, 1).End(xlUp).Row, 20))
    ArrPost = WsPost.Range(WsPost.Cells(1, 1), WsPost.Cells(WsPost.Cells(WsPost.Rows.Count, 1).End(xlUp).Row, 20))

    If Not UBound(ArrPre, 1) = UBound(ArrPost, 1) Then
        MsgBox "Unequal number of rows in sheets PRE and POST. Exiting macro.", vbCritical, "Unequal sheets"
    Else

        For Row = 2 To UBound(ArrPre, 1)
            DeleteRow = True
            For Column = 1 To UBound(ArrPre, 2)
                If Not ArrPre(Row, Column) = ArrPost(Row, Column) Then
                    DeleteRow = False
                    Exit For
                End If
            Next Column

            If DeleteRow = True Then
                If DeletePre Is Nothing Then
                    Set DeletePre = WsPre.Rows(Row)
                    Set DeletePost = WsPost.Rows(Row)
                Else
                    Set DeletePre = Union(DeletePre, WsPre.Rows(Row))
                    Set DeletePost = Union(DeletePost, WsPost.Rows(Row))
                End If

            End If
        Next Row

        If Not DeletePre Is Nothing Then DeletePre.Delete
        If Not DeletePost Is Nothing Then DeletePost.Delete

    End If

    With Application
        .ScreenUpdating = True
        .Calculation = xlCalculationAutomatic
        .EnableEvents = True
    End With

End Sub

结果

  

我的代码 - 500,000行数据。

     

在14,23秒内处理了500.000行和10列的数据表,发现561行相等且已被删除。

     

原始代码 - 500,000行数据。

     

不幸的是,我的系统无法处理此任务,Excel停止工作。   

<小时/>

     

我的代码 - 250,000行数据。

     

250.000行和10列的数据表已在4,72秒内处理完毕,已发现313行相等且已被删除。

     

原始代码 - 250,000行数据。

     

250.000行和10列的数据表已在14,07秒内处理完毕,已发现313行相等且已被删除。

答案 2 :(得分:-1)

也许你可以进行2次调整,虽然它们的性能影响很小:

 
' prepare references to worksheets
Dim WorksheetPRE As Worksheet
Dim WorksheetPOST As Worksheet
Set WorksheetPRE = ActiveWorkbook.Worksheets("PRE")
Set WorksheetPOST = ActiveWorkbook.Worksheets("POST")

然后,在您的代码中,将ActiveWorkbook.Worksheets("PRE")替换为WorksheetPRE等。

我认为当你留在Excel中时没有其他重要的优化。请记住,Microsoft Excel主要是spreadsheet计算器,而不是数据表处理工具。

如果我真的需要加快比较,那么我会采用以下方法之一:

  • 将Excel工作表链接到Microsoft Access作为表并在Access中执行比较(最简单)

  • 如上所述,但不是链接表格,而是 import

  • 如上所述,但使用Microsoft SQL Server(Express版本免费)