我编写了一个代码来比较两个工作表WS1和Ws2。代码从ws1中读取每一行的主键,并在ws2中找到具有相同主键的相应行,然后在两个工作表之间匹配所有其他列属性并进行相应的报告。
代码是:
Sub DataComparator(ws1 As Worksheet, ws2 As Worksheet)
Dim ws1row As Long, ws2row As Long, ws1col As Integer, ws2col As Integer
Dim maxrow As Long, maxcol As Integer, colval1 As String, colval2 As String
Dim difference As Long, reportrow As Long, reportcol As Long, flag As Boolean
Dim row As Long, col As Long, pki As Long, pk As String, counter As Long
Dim PctDone As Single, cell1 As String, cell2 As String, bfailed As Boolean
TestDataComparator.FrameProgress.Visible = True
TestDataComparator.LabelProgress.Visible = True
'UserForm1.Visible = True
'Application.ScreenUpdating = False
DoEvents
With ws1.UsedRange
ws1row = .Rows.Count
ws1col = .Columns.Count
End With
With ws2.UsedRange
ws2row = .Rows.Count
ws2col = .Columns.Count
End With
maxrow = ws1row
maxcol = ws1col
pk = UCase(TestDataComparator.TextBox1.Value)
For col = 1 To maxcol
If pk = UCase(ws1.Cells(1, col).Formula) Then
pki = col
End If
Next col
If maxrow < ws2row Then maxrow = ws2row
If maxcol < ws2col Then maxcol = ws2col
difference = 0
reportrow = 0
For row = 2 To maxrow
keyval = ws1.Cells(row, 1).Formula
flag = False
bfailed = False
'reportcol = 1
For col = 2 To maxcol
'If col = pki Then
'Exit For
'End If
counter = counter + 1
cell1 = ""
cell2 = ""
cell1 = ws1.Cells(row, col).Formula
On Error Resume Next
'Set Rng = Range("A2:" & Cells(ws2row, "A").Address)
cell2 = Application.WorksheetFunction.VLookup(keyval, ws2.UsedRange, col, False)
If Err.Number <> 0 Then bfailed = True
On Error GoTo 0
If bfailed = True Then
Exit For
End If
If cell1 <> cell2 Then
flag = True
'difference = difference + 1
diffcolname = ws1.Cells(1, col)
ws1.Cells(row, col).Interior.Color = RGB(255, 255, 0)
ws1.Cells(1, col).Interior.Color = RGB(255, 0, 0)
ws1.Cells(row, col).Font.Bold = True
ws1.Cells(1, pki).Interior.Color = RGB(0, 255, 0)
ws1.Cells(row, pki).Interior.Color = RGB(255, 255, 0)
ws1.Cells(row, pki).Font.Color = RGB(255, 0, 0)
ws1.Cells(row, pki).Font.Bold = True
End If
Next col
If flag = True Then
reportrow = reportrow + 1
End If
PctDone = counter / (maxrow * maxcol)
TestDataComparator.FrameProgress.Caption = "Progress..." & Format(PctDone, "0%")
TestDataComparator.LabelProgress.Width = PctDone * (TestDataComparator.FrameProgress.Width - 10)
DoEvents
Next row
TestDataComparator.Totalcount.Value = row - 2
TestDataComparator.mismatchCount.Value = reportrow
TestDataComparator.mismatchCount.Font = Bold
difference = 0
For col = 1 To maxcol
If ws1.Cells(1, col).Interior.Color = RGB(255, 0, 0) Then
difference = difference + 1
TestDataComparator.AttributeNameList.AddItem (ws1.Cells(1, col))
End If
Next col
TestDataComparator.FrameProgress.Visible = False
TestDataComparator.LabelProgress.Visible = False
'TestDataComparator.PleaseWait.Visible = False
MsgBox difference & " columns contain different data! ", vbInformation, "Comparing Two Worksheets"
Application.ScreenUpdating = True
End Sub
我希望vlookup函数仅在WS2的整个列中搜索匹配,该列具有主键(index pki)而不是ws2.UsedRange。请提供建议。有没有比vlookup更好的选项? ws2.UsedRange的使用使得在大型数据集中搜索变得困难,这就是我想减少搜索空间的原因。我的数据集在excel中有超过40K行和155列。如果您认为不合适,还建议我计算进度条进度的公式。
来自OP评论的示例数据:
Name Height Weight
Jane 5'6'' 78
Mike 5'4'' 89
Monica 5'2'' 56
答案 0 :(得分:2)
我认为使用词典(在其他语言中也称为Hashtable)可以使它更快。您需要引用Microsoft Scripting Runtime库。
在开始逐行浏览ws1之前,您需要在一个循环中将ws2键值及其行号读入Dictionary中。然后在循环中,只需在字典中查找值,即可在ws2上获取行号。像这样:
Dim ws2keys As Dictionary
Set ws2keys = New Dictionary
' assuming you have a header row
For row = 2 To ws2.UsedRange.Rows.Count
keyValue = ws1.Cells(row, 1).Value
If keyValue <> "" Then ws2keys.Add(keyValue, row)
Next
' your dictionary is ready
然后在循环中,而不是在ws1上逐行使用VLookup:
ws2RowIndex = ws2keys.Item(ws1KeyValueYouAreLookingFor)
(代码可能不完美,我在这台机器上没有任何与微软相关的内容来检查语法,抱歉。)
答案 1 :(得分:0)
我已将每列的VLOOKUP
缩减为单个MATCH
以验证其是否存在,并将MATCH
设置为匹配发生的WS2
行。其他一切都是通过直接寻址完成的。
Sub DataComparator(ws1 As Worksheet, ws2 As Worksheet)
Dim ws1row As Long, ws2row As Long, ws1col As Long, ws2col As Long
Dim maxrow As Long, maxcol As Long, colval1 As String, colval2 As String
Dim difference As Long, reportrow As Long, reportcol As Long, flag As Boolean
Dim rw As Long, cl As Long, pki As Long, pk As String, counter As Long
Dim cell1 As String, cell2 As String, bfailed As Boolean
Dim iPCT As Long, ws2rw As Long, rWS1cr As Range, rWS2cr As Range, keyval As Variant, app As Application
Set app = Application
'UserForm1.Visible = True
app.ScreenUpdating = False
'DoEvents
With ws1.Cells(1, 1).CurrentRegion
Set rWS1cr = .Cells
ws1row = .Rows.Count
ws1col = .Columns.Count
End With
With ws2.Cells(1, 1).CurrentRegion
Set rWS2cr = .Cells
ws2row = .Rows.Count
ws2col = .Columns.Count
End With
maxrow = ws1row
maxcol = ws1col
'pk = UCase(TestDataComparator.TextBox1.Value)
For cl = 1 To maxcol
If pk = UCase(rWS1cr.Cells(1, cl).Value) Then
pki = cl
Exit For
End If
Next cl
If maxrow < ws2row Then maxrow = ws2row
If maxcol < ws2col Then maxcol = ws2col
difference = 0
reportrow = 0
With rWS1cr
For rw = 2 To maxrow
keyval = ws1.Cells(rw, 1).Value
If Not IsError(app.Match(keyval, rWS2cr.Columns(1), 0)) Then
ws2rw = app.Match(keyval, rWS2cr.Columns(1), 0)
flag = False
For cl = 2 To maxcol
counter = counter + 1
cell1 = vbNullString
cell2 = vbNullString
cell1 = .Cells(rw, cl).Value
cell2 = rWS2cr.Cells(ws2rw, cl).Value
If cell1 <> cell2 Then
flag = True
'diffcolname = .Cells(1, cl)
.Cells(rw, cl).Interior.Color = RGB(255, 255, 0)
.Cells(1, cl).Interior.Color = RGB(255, 0, 0)
.Cells(rw, cl).Font.Bold = True
.Cells(1, pki).Interior.Color = RGB(0, 255, 0)
.Cells(rw, pki).Interior.Color = RGB(255, 255, 0)
.Cells(rw, pki).Font.Color = RGB(255, 0, 0)
.Cells(rw, pki).Font.Bold = True
End If
Next cl
reportrow = reportrow - CLng(flag)
If iPCT <> CLng((rw / maxrow) * 100) Then
iPCT = CLng((rw / maxrow) * 100)
app.StatusBar = "Progress - " & Format(iPCT, "0\%")
End If
End If
Next rw
For cl = 1 To maxcol
If .Cells(1, cl).Interior.Color = RGB(255, 0, 0) Then
difference = difference + 1
'TestDataComparator.AttributeNameList.AddItem (ws1.Cells(1, col))
End If
Next cl
MsgBox difference & " columns contain different data! ", vbInformation, "Comparing Two Worksheets"
End With
difference = 0
app.ScreenUpdating = True
app.StatusBar = vbNullString
Set app = Nothing
End Sub
我更喜欢.CurrentRegion
到.UsedRange
,因为我发现它更可靠。这段代码没有经过测试,但它确实编译了,我不得不注释掉一些外部引用来实现这一点。