我有一个大数据集(约300.000行),我想根据两列中的值删除重复项。我试图在下面举例说明。我编写的代码现在可以检测到重复项,但是如果我随后运行RemoveDuplicates,则(显然)所有重复项都将被删除。我只想在以下数据集中删除瑞典的重复项。有什么方法可以不使用Autofilter命令等而开始的吗?如果我的路还很遥远,我也很高兴能提供任何输入。
[数据集1
This is my code:
Option Explicit
Public Sub Markdups()
'Timing
Dim dblStart As Double
dblStart = Timer
Dim wksIDs As Worksheet
Dim varIDs As Variant, varStatus As Variant, _
varID As Variant
Dim strID As String
Dim lngLastRow As Long, lngIdx As Long
Dim dicDistincts As Scripting.Dictionary, _
dicDuplicates As Scripting.Dictionary
Set dicDistincts = New Scripting.Dictionary
Set dicDuplicates = New Scripting.Dictionary
Set wksIDs = ThisWorkbook.Worksheets("uke 32 onward")
lngLastRow = LastOccupiedRowNum(wksIDs)
dicDistincts.CompareMode = vbTextCompare
dicDuplicates.CompareMode = vbTextCompare
varIDs = wksIDs.Range("D1:D" & lngLastRow) 'Row to search
varStatus = varIDs
For Each varID In varIDs
strID = Trim(CStr(varID))
If strID <> vbNullString Then
If Not dicDistincts.Exists(strID) Then
dicDistincts.Add Key:=strID, Item:=strID
ElseIf Not dicDuplicates.Exists(strID) Then
dicDuplicates.Add Key:=strID, Item:=strID
End If
End If
Next varID
lngIdx = 1
For Each varID In varIDs
If dicDuplicates.Exists(CStr(varID)) Then
varStatus(lngIdx, 1) = "Duplicate"
Else
varStatus(lngIdx, 1) = "Unique"
End If
lngIdx = lngIdx + 1
Next varID
wksIDs.Range("O1:O" & lngLastRow) = varStatus
MsgBox "Duplicates counted Took " & _
Round(Timer - dblStart, 2) & " seconds..."
End Sub
非常感谢您提供的任何帮助。
最佳