因为一周我试图在我的主题上取得成功,但我找不到任何可接受的解决方案。我的意思是,......我有一个有效的解决方案,但比较只需要半天左右:-S
前提条件: 两个csv文件都已复制粘贴到本地工作簿中。他们在场,随时准备和他们一起玩。每个文件有~6000行和4列。 列A:文档名称/版本 B栏:subject1 C栏:subject2 D列:boolean-artefact 两个csv文件具有相同的结构。 A列包括文档名称及其最新版本。 每行包含:documentname / version,subj1,subj2,boolean
的组合CSV_old new的示例,包括column_E
中csv_new的注释/更改Document/Version Subj1 Subj2 BOOLEAN
DOC_1/Vers1 FUN GERMANY FALSE
DOC_2/Vers3 FUN GERMANY TRUE
DOC_2/Vers3 FUN UK TRUE <- to be deleted in CSV_new
DOC_2/Vers3 FUN FRANCE TRUE
DOC_3/Vers7 ACTION GERMANY FALSE <- Version Update in CSV_new
DOC_4/Vers4 MOVIE UK TRUE
DOC_6/Vers1 HELP SPAIN FALSE
DOC_7/Vers2 FUN GERMANY FALSE <- boolean: true in CSV_new
DOC_8/Vers5 FUN FRANCE TRUE <- Subj1: ACTION instead of FUN
CSV_new
Document/Version Subj1 Subj2 BOOLEAN
DOC_1/Vers1 FUN GERMANY FALSE
DOC_2/Vers3 FUN GERMANY TRUE
DOC_2/Vers3 FUN UK TRUE
DOC_2/Vers3 FUN FRANCE TRUE
DOC_3/Vers9 ACTION GERMANY FALSE <- Version Updated
DOC_4/Vers4 MOVIE UK TRUE
DOC_5/Vers5 DANGER UK FALSE <- new/added Row in CSV_new
DOC_6/Vers1 HELP SPAIN FALSE
DOC_7/Vers2 FUN GERMANY FALSE <- boolean updated to true
DOC_8/Vers5 ACTION FRANCE TRUE <- Subj1: ACTION instead of FUN
目的: 比较两个CSV文件(均来自数据库)。每个文件都是来自庞大数据库(摘录)的派生版本。我想检查更旧的csv文件(例如版本2.0,csv_old)(例如版本4.1,csv_new)。
这样我希望看到数据库的两个派生版本(数据提取)之间的差异。可以有新的插入/添加行以及删除的行。
到目前为止,我得到的代码正在运行,但需要花费太多时间。我粘贴了一种伪代码,让你想象我的方法(它只包含一步比较):
对于rowInOldCSV = 3到表格(“_ ws_oldCSV”)。范围(“A65536”)。结束(xlUp)。行
Set findSameDocumentNumberInColumnA = Sheets(givenActiveWS).Cells.Find(Sheets("_ws_oldCSV").Range("A" & rowInOldCSV & ":D" & rowInOldCSV).Value, LookIn:=xlValues)
Set findSameDocumentNumberInColumnA_withoutVers = Sheets(givenActiveWS).Cells.Find(Left(Sheets("_ws_oldCSV").Cells(rowInOldCSV, 1).Value, Len(Sheets("_ws_oldCSV").Cells(rowInOldCSV, 1).Value) - 5), LookIn:=xlValues)
If Not findSameDocumentNumberInColumnA Is Nothing Then
'document/version found!
firstAddress = findSameDocumentNumberInColumnA.Address
Do
'if subj1+subj2 are same
If (Sheets(givenActiveWS).Cells(findSameDocumentNumberInColumnA.Row, 2).Value = Sheets("_ws_oldCSV").Cells(rowInOldCSV, 2).Value) And _
(Sheets(givenActiveWS).Cells(findSameDocumentNumberInColumnA.Row, 3).Value = Sheets("_ws_oldCSV").Cells(rowInOldCSV, 3).Value) Then '....and boolean-value the same
'Sheets("_ws_oldCSV").Range("A" & rowInOldCSV & ":D" & rowInOldCSV).Copy 'takes even longer
'Sheets(givenActiveWS).Cells(findSameDocumentNumberInColumnA.Row, 6).PasteSpecial Paste:=xlPasteValues
Sheets(givenActiveWS).Cells(findSameDocumentNumberInColumnA.Row, 6).Value = Sheets("_ws_oldCSV").Cells(rowInOldCSV, 1).Value
Sheets(givenActiveWS).Cells(findSameDocumentNumberInColumnA.Row, 7).Value = Sheets("_ws_oldCSV").Cells(rowInOldCSV, 2).Value
Sheets(givenActiveWS).Cells(findSameDocumentNumberInColumnA.Row, 8).Value = Sheets("_ws_oldCSV").Cells(rowInOldCSV, 3).Value
Sheets(givenActiveWS).Cells(findSameDocumentNumberInColumnA.Row, 9).Value = Sheets("_ws_oldCSV").Cells(rowInOldCSV, 4).Value
'leave loop
Exit Do
End If
Set findSameDocumentNumberInColumnA = Sheets(givenActiveWS).Cells.FindNext(findSameDocumentNumberInColumnA)
Loop While Not findSameDocumentNumberInColumnA Is Nothing And findSameDocumentNumberInColumnA.Address <> firstAddress
Else
'document/version not found
If Not findSameDocumentNumberInColumnA_withoutVers Is Nothing Then
'document found, looks like new version
'mark it with yellow to show updated version
Else
'unkown document, means new introduced since csv_old
'copy it under last item in RowF
'
End If
End If
next rowInOldCSV
到目前为止我的方法。我看到了两个不同的: http://www.ms-office-forum.net/forum/showthread.php?t=279399 和 Excel VBA: Range to String Array in 1 step 两者似乎工作得很好,速度很快,但不幸的是我无法在我的场景中使用它。
我想,我必须将列中的值放入string-array中才能开始比较?我没有想法,也不知道如何将Column-Values处理成String-Arrays。对不起,......
你可以帮我吗?
比较结果:将内容写入CSV_new会很好。
Doc/Vers Subj1 Subj2 BOOLEAN Doc Subj1 Subj1 Boolean
DOC_1/Vers1 FUN GERMANY FALSE - - - -
DOC_2/Vers3 FUN GERMANY TRUE - - - -
DOC_2/Vers3 FUN UK TRUE Deleted - - -
DOC_2/Vers3 FUN FRANCE TRUE - - - -
DOC_3/Vers9 ACTION GERMANY FALSE Updated - - -
DOC_4/Vers4 MOVIE UK TRUE - - - -
DOC_5/Vers5 DANGER UK FALSE New - - -
DOC_6/Vers1 HELP SPAIN FALSE - - - -
DOC_7/Vers2 FUN GERMANY TRUE - - - X
DOC_8/Vers5 ACTION FRANCE TRUE - X - -
很多,非常感谢您的努力!!!!! :O)
答案 0 :(得分:0)
此代码将生成两组结果:一组用于Sheet1
(旧),另一组用于Sheet2
(新)
Sheet1
将显示记录 missing from Sheet2
Sheet2
将显示记录 missing from Sheet1
它使用嵌套词典(详见下文)
Option Explicit
Public Sub CompareCSVs() '1 = Old, 2 = New; UsedRange starts at A1
Const LC1 = 4 'D - LastCol in Old
Const LC2 = 4 'D - LastCol in New
Dim ur1 As Range, arr1 As Variant, dv1 As Object
Dim ur2 As Range, arr2 As Variant, dv2 As Object
Set ur1 = Sheet1.UsedRange 'Or: ThisWorkbook.Worksheets("csv_old").UsedRange
Set ur2 = Sheet2.UsedRange 'Or: ThisWorkbook.Worksheets("csv_new").UsedRange
arr1 = ur1
arr2 = ur2
Set dv1 = CreateObject("Scripting.Dictionary")
Set dv2 = CreateObject("Scripting.Dictionary")
Dim urRes1 As Range, urRes2 As Range, arrRes1 As Variant, arrRes2 As Variant
Set urRes1 = ur1.Offset(1, LC1).Resize(ur1.Rows.Count - 1, LC1 + 1) 'Exclude Headers
Set urRes2 = ur2.Offset(1, LC2).Resize(ur2.Rows.Count - 1, LC2 + 1) 'Exclude Headers
urRes1.ClearContents 'Clear results
urRes2.ClearContents
arrRes1 = urRes1
arrRes2 = urRes2
SetDictionaries dv1, arr1, LC1
SetDictionaries dv2, arr2, LC2: 'ShowAllItems dv1: ShowAllItems dv2
CompareData dv1, dv2, arrRes2
CompareData dv2, dv1, arrRes1
urRes1 = arrRes1
urRes2 = arrRes2
End Sub
Private Sub SetDictionaries(ByRef d As Object, ByRef arr As Variant, ByVal ubC As Long)
Dim r As Long, c As Long, k As String
For r = 2 To UBound(arr)
For c = 1 To ubC
k = k & arr(r, c) & "|"
d(Left(k, Len(k) - 1)) = 0
Next
k = vbNullString
Next
End Sub
Private Sub CompareData(ByRef d1 As Variant, ByRef d2 As Variant, ByRef res As Variant)
Dim r As Long, c As Long, itm As Variant, sp As Variant, k As Variant
r = 1
For Each itm In d2
sp = Split(itm, "|")
c = UBound(sp) + 1
If Not d1.Exists(itm) Then
If Len(res(r, 1)) = 0 Then
res(r, 1) = IIf(c = 1, "Missing: ", "Updated: ")
res(r, c + 1) = sp(c - 1)
Else
If res(r, 1) = "Updated: " Then res(r, c + 1) = sp(c - 1)
End If
End If
If c = 4 Then r = r + 1
Next
End Sub
Private Sub ShowAllItems(ByRef d As Object)
Dim x As Variant
For Each x In d
Debug.Print x 'Space$(5), String$(5, "-")
Next
Debug.Print
End Sub
字典,准备比较时
在
在
注意:您提供的样本数据与描述
中的不同CSV_old
Document/Version Subj1 Subj2 BOOLEAN
DOC_1/Vers1 FUN GERMANY FALSE <- Correct
DOC_2/Vers3 FUN GERMANY TRUE <- Correct
DOC_2/Vers3 FUN UK TRUE <- to be deleted in CSV_new <- Exists in new
DOC_2/Vers3 FUN FRANCE TRUE <- Correct
DOC_3/Vers7 ACTION GERMANY FALSE <- Version Update in CSV_new <- This not in new
DOC_4/Vers4 MOVIE UK TRUE <- Correct
DOC_6/Vers1 HELP SPAIN FALSE <- Correct
DOC_7/Vers2 FUN GERMANY FALSE <- boolean: true in CSV_new <- FALSE in new
DOC_8/Vers5 FUN FRANCE TRUE <- Subj1: ACTION instead of FUN <- Correct
CSV_new
Document/Version Subj1 Subj2 BOOLEAN
DOC_1/Vers1 FUN GERMANY FALSE <- Correct
DOC_2/Vers3 FUN GERMANY TRUE <- Correct
DOC_2/Vers3 FUN UK TRUE <- Exists in new
DOC_2/Vers3 FUN FRANCE TRUE <- Correct
DOC_3/Vers9 ACTION GERMANY FALSE <- Version Updated <- New record
DOC_4/Vers4 MOVIE UK TRUE <- Correct
DOC_5/Vers5 DANGER UK FALSE <- new/added Row in CSV_new <- Correct
DOC_6/Vers1 HELP SPAIN FALSE <- Correct
DOC_7/Vers2 FUN GERMANY FALSE <- boolean updated to true <- FALSE in new
DOC_8/Vers5 ACTION FRANCE TRUE <- Subj1: ACTION instead of FUN <- Correct