在工作簿保持冻结的情况下使用excel比较两列

时间:2014-12-17 14:35:13

标签: excel excel-vba vba

我试图在两列之间进行比较。列A有大约85,000个列表,列B大约有270,000个列表。我尝试使用vlookup和其他功能,如countif。但是,每当我尝试这些功能时,我的excel工作簿可能需要大约45分钟到一个小时甚至完成一项基本任务。请问,为什么我的Excel工作簿会一直冻结和放慢?提前谢谢。

1 个答案:

答案 0 :(得分:0)

下面的代码是一种突出显示重复数据的方法,如果需要,可以将其展开以使其更复杂。我还包括了我用来为这类工作排序数组的QuickSortArray过程,sort proc不是我的工作,并且信用在评论中。代码有很好的文档,所以希望它有所帮助

'---------------------------------------------------------------------------------------
' Procedure : DuplicateCheck
' Author    : Mark Moore
' Date      : 17/12/2014
' Purpose   : This procedure is an simple example of duplicate checking multiple columns of data
'             What is checked can simply be 2 columns of data, or can also be as complex as the developer
'             wants to code it i.e.  You could use this same method to duplicate compound values, simply by
'             populating the array with concatenated data.  Likewise the data that is reported can easily be
'             made richer by adding data you may wish to report on.  eg,  Sheet name could be added for each value
'             if you wanted to compare data across sheets.
'---------------------------------------------------------------------------------------
'
Sub DuplicateCheck()
Dim ColAArray, ColBArray, CombinedArray, DupesArray As Variant
Dim ArrayRange As String
Dim FirstRowInArray, LastRowInArray, loopcounter, DataRowNumber, DupesCounter As Long

   'define the range of list A by going from A1 to last populated cell in col A
   ArrayRange = "A1" & ":A" & Sheets("Sheet1").Range("A" & Rows.Count).End(xlUp).Row
   'Assign values to array
   ColAArray = Worksheets("Sheet1").Range(ArrayRange).Value ' read all the values at once from the Excel grid, put into an array

   'define the range of list B by going from B1 to last populated cell in col B
   ArrayRange = "B1" & ":B" & Sheets("Sheet1").Range("B" & Rows.Count).End(xlUp).Row
   'Assign values to array
   ColBArray = Worksheets("Sheet1").Range(ArrayRange).Value ' read all the values at once from the Excel grid, put into an array

   'We are going  to combine the 2 arrays and add cell addresses, so redefine the combined array to size of both arrays together
   ReDim CombinedArray(LBound(ColAArray, 1) To UBound(ColAArray) + UBound(ColBArray, 1), 1 To 2)
   'In theory every value could be duplicated, so size the duplicate array to combined size also
   ReDim DupesArray(LBound(CombinedArray, 1) To UBound(CombinedArray, 1), 1 To 2)

   'This is the row number of the data, in our case its 1 because our data range started at row 1 - change if you change the data range
   DataRowNumber = 1
   'First populate the ColA array into the combined array
   For loopcounter = LBound(ColAArray, 1) To UBound(ColAArray, 1)
       CombinedArray(loopcounter, 1) = ColAArray(loopcounter, 1) 'Actual value from Col A
       CombinedArray(loopcounter, 2) = "A" & DataRowNumber 'Cell address of that value
       DataRowNumber = DataRowNumber + 1 ' Increment the row number
   Next loopcounter

   'reset the data row back to 1
   DataRowNumber = 1
   'Now add the colb array to the combined array, starting from the element after the end of the first array
   For loopcounter = UBound(ColAArray, 1) + 1 To UBound(CombinedArray, 1)
       CombinedArray(loopcounter, 1) = ColBArray(DataRowNumber, 1) 'Actual value from Col A
       CombinedArray(loopcounter, 2) = "B" & DataRowNumber 'Cell address of that value
       DataRowNumber = DataRowNumber + 1 ' Increment the row number
   Next loopcounter

   Call QuickSortArray(CombinedArray, , , 1) 'Sort the array by cell values, so any dupes will be next to each other

   'Using variables for these values just for readability
   FirstRowInArray = LBound(CombinedArray, 1)
   LastRowInArray = UBound(CombinedArray, 1)

   'Initialise the duplicate counter
   DupesCounter = 0

   'The array is sorted, so loop through the array looking for duplicate values next to each other
   For loopcounter = LBound(CombinedArray, 1) To UBound(CombinedArray, 1)
       Select Case loopcounter
           Case LastRowInArray 'Deal with the last row - We cant check the next row as would be out of bound
               If CombinedArray(loopcounter, 1) = CombinedArray(loopcounter - 1, 1) Then
                   DupesCounter = DupesCounter + 1
                   'Add an entry to the error array
                   DupesArray(DupesCounter, 1) = CombinedArray(loopcounter, 1) 'Duplicated value
                   DupesArray(DupesCounter, 2) = CombinedArray(loopcounter, 2) 'Cell address
                   Exit For 'last element so exit
               End If
           'Deal with first - we cant check the preceeding rows
           Case FirstRowInArray
               If CombinedArray(loopcounter, 1) = CombinedArray(loopcounter + 1, 1) Then
                   DupesCounter = DupesCounter + 1
                   'Add an entry to the error array
                   DupesArray(DupesCounter, 1) = CombinedArray(loopcounter, 1) 'Duplicated value
                   DupesArray(DupesCounter, 2) = CombinedArray(loopcounter, 2) 'Cell address
               End If
           Case Else 'Deal with all other rows - we need to check if the preceeding or the following row is a duplicate
               If CombinedArray(loopcounter, 1) = CombinedArray(loopcounter - 1, 1) Or _
               CombinedArray(loopcounter, 1) = CombinedArray(loopcounter + 1, 1) Then
                   DupesCounter = DupesCounter + 1
                   'Add an entry to the error array
                   DupesArray(DupesCounter, 1) = CombinedArray(loopcounter, 1) 'Duplicated value
                   DupesArray(DupesCounter, 2) = CombinedArray(loopcounter, 2) 'Cell address
               End If
       End Select
   Next loopcounter

   'Now process the duplicates.  You could do anythin here with this data, report to text file etc
   'I have just chosen to colour the cells
   If DupesCounter > 0 Then
      For loopcounter = 1 To DupesCounter
        ActiveSheet.Range(DupesArray(loopcounter, 2)).Interior.Color = RGB(255, 255, 0)
      Next loopcounter
      MsgBox DupesCounter & " duplicates were found and highlighted when comparing values between column A and B", vbInformation, "Results of comparing Column A values with Column B"
   Else
      MsgBox "No Duplicated values between column A and B", vbInformation, "Results of comparing Column A values with Column B"
   End If
End Sub

'---------------------------------------------------------------------------------------
' Procedure : QuickSortArray
' Purpose   : This procedure is used to sort multi dimensional arrays - It is primarily used to sort arrays
'             from sheets where the original sheet sort order wants to be left "as is", so the data is read into
'             an array and then sorted.  This code was taken from the internet, with comments/credits as shown below
'             source url:  http://stackoverflow.com/questions/152319/vba-array-sort-function
'               Sort a 2-Dimensional array
'               SampleUsage: sort arrData by the contents of column 3 ' ' QuickSortArray arrData, , , 3
'               Posted by Jim Rech 10/20/98 Excel.Programming
'               Modifications, Nigel Heffernan:
'                   Escape failed comparison with empty variant
'                   Defensive coding: check inputs
' Version   : 06/01/2014 : Mark Moore - Initial Version
'---------------------------------------------------------------------------------------
Public Sub QuickSortArray(ByRef SortArray As Variant, _
    Optional lngMin As Long = -1, _
    Optional lngMax As Long = -1, _
    Optional lngColumn As Long = 0)

Dim i As Long
Dim j As Long
Dim varMid As Variant
Dim arrRowTemp As Variant
Dim lngColTemp As Long

On Error Resume Next

    If IsEmpty(SortArray) Then
        Exit Sub
    End If


    If InStr(TypeName(SortArray), "()") < 1 Then 'IsArray() is somewhat broken: Look for brackets in the type name
        Exit Sub
    End If


    If lngMin = -1 Then
        lngMin = LBound(SortArray, 1)
    End If

    If lngMax = -1 Then
        lngMax = UBound(SortArray, 1)
    End If


    If lngMin >= lngMax Then ' no sorting required
        Exit Sub
    End If

    i = lngMin
    j = lngMax

    varMid = Empty
    varMid = SortArray((lngMin + lngMax) \ 2, lngColumn)


    ' We send 'Empty' and invalid data items to the end of the list:
    If IsObject(varMid) Then ' note that we don't check isObject(SortArray(n)) - varMid might pick up a valid default member or property
        i = lngMax
        j = lngMin
    ElseIf IsEmpty(varMid) Then
        i = lngMax
        j = lngMin
    ElseIf IsNull(varMid) Then
        i = lngMax
        j = lngMin
    ElseIf varMid = "" Then
        i = lngMax
        j = lngMin
    ElseIf VarType(varMid) = vbError Then
        i = lngMax
        j = lngMin
    ElseIf VarType(varMid) > 17 Then
        i = lngMax
        j = lngMin
    End If

    While i <= j
        While SortArray(i, lngColumn) < varMid And i < lngMax
            i = i + 1
        Wend

        While varMid < SortArray(j, lngColumn) And j > lngMin
            j = j - 1
        Wend

        If i <= j Then
             ' Swap the rows
             ReDim arrRowTemp(LBound(SortArray, 2) To UBound(SortArray, 2))
             For lngColTemp = LBound(SortArray, 2) To UBound(SortArray, 2)
                 arrRowTemp(lngColTemp) = SortArray(i, lngColTemp)
                 SortArray(i, lngColTemp) = SortArray(j, lngColTemp)
                 SortArray(j, lngColTemp) = arrRowTemp(lngColTemp)
             Next lngColTemp
             Erase arrRowTemp
             i = i + 1
             j = j - 1
        End If
    Wend

    If (lngMin < j) Then Call QuickSortArray(SortArray, lngMin, j, lngColumn)
    If (i < lngMax) Then Call QuickSortArray(SortArray, i, lngMax, lngColumn)
End Sub