从Excel工作表中的单元格中删除冗余数据

时间:2018-06-24 10:16:00

标签: excel vba excel-vba duplicates

我在2个不同列的两个单元格中都有数据。

例如:
ColA:A1单元格的逗号分隔值为1,2,3
ColB:B1单元格的逗号分隔值为ABC,DEF,ABC

想要实现逻辑,以便使其显示为

ColA    ColB
1,3     ABC
2       DEF

Ex2 .:
ColA:A1单元格的逗号分隔值为1,2,3
ColB:B1细胞的逗号分隔值为ABC,ABC,ABC

ColA      ColB
1,2,3     ABC

到目前为止,我已经为B列实现了逻辑,但是在执行此操作时无法更新col A数据。

Sub RemoveDupData()
    Dim sString As String
    Dim MyAr As Variant
    Dim Col As New Collection
    Dim itm

    sString = "ABC,DEF,ABC,CDR"

    MyAr = Split(sString, ",")

    For i = LBound(MyAr) To UBound(MyAr)
        On Error Resume Next
        '-- A collection cannot have the same key twice so here, we are creating a key using the item that we are adding.
        '-- This will ensure that we will not get duplicates.       
        Col.Add Trim(MyAr(i)), CStr(Trim(MyAr(i)))
        On Error GoTo 0
    Next i

    sString = ""

    For Each itm In Col
        sString = sString & "," & itm
    Next

    sString = Mid(sString, 2)

End Sub

3 个答案:

答案 0 :(得分:2)

此方法比Jeeped的方法复杂,但可能更易于适应变化。

我进行了逐行处理,但是,只需更改密钥的生成方式,就可以对整个数据集colB进行重复数据删除(请参见代码中的注释)

我使用字典来确保密钥不重复,并且字典项将是相关colA值的集合。

Sub FixData()
    Dim wsSrc As Worksheet, wsRes As Worksheet, rRes  As Range
    Dim vSrc As Variant, vRes As Variant
    Dim vA As Variant, vB As Variant
    Dim I As Long, J As Long
    Dim dD As Object, Col As Collection
    Dim sKey As String

Set wsSrc = Worksheets("sheet1")

'Note that depending on how you set these parameters, you will be
'able to write the Results anyplace in the workbook,
'even overlying the original data
Set wsRes = Worksheets("sheet1")
    Set rRes = wsRes.Cells(1, 5)

With wsSrc
    vSrc = .Range(.Cells(1, 1), .Cells(.Rows.Count, 2).End(xlUp))
End With

'Use a dictionary to collect both the unique items in ColB (which will be the key)
'and a collection of the relevant objects in ColA
Set dD = CreateObject("scripting.dictionary")
For I = 1 To UBound(vSrc, 1)
    vA = Split(vSrc(I, 1), ",")
    vB = Split(vSrc(I, 2), ",")
        If UBound(vA) <> UBound(vB) Then
            MsgBox "different number of elements in each column"
        End If

        For J = 0 To UBound(vA)
            sKey = vB(J) & "|" & I

            'To remove dups from the entire data set
            ' change above line to:
            'sKey = vB(J)

            If Not dD.Exists(sKey) Then
                Set Col = New Collection
                Col.Add vA(J)
                dD.Add Key:=sKey, Item:=Col
            Else
                dD(sKey).Add vA(J)
            End If
        Next J
Next I

'Create Results array
ReDim vRes(1 To dD.Count, 1 To 2)
I = 0
For Each vB In dD.Keys
    I = I + 1
    vRes(I, 2) = Split(vB, "|")(0)

    For J = 1 To dD(vB).Count
        vRes(I, 1) = vRes(I, 1) & "," & dD(vB)(J)
    Next J
        vRes(I, 1) = Mid(vRes(I, 1), 2) 'remove leading comma
Next vB

'write results to worksheet
Set rRes = rRes.Resize(UBound(vRes, 1), 2)
With rRes
    .EntireColumn.Clear
    .Value = vRes
    .HorizontalAlignment = xlLeft
End With
End Sub

源数据

enter image description here

逐行处理

enter image description here

整个数据集处理

enter image description here

答案 1 :(得分:1)

这似乎满足了您发布的两个示例。

Option Explicit

Sub RemoveDupData()
    Dim i As Long, valA As Variant, valB As Variant, r As Variant
    With Worksheets("sheet7")
        valA = Split(.Cells(1, "A"), Chr(44))
        valB = Split(.Cells(1, "B"), Chr(44))

        For i = LBound(valB) To UBound(valB)
            r = Application.Match(valB(i), valB, 0)
            Select Case True
                Case r < i + 1
                    valB(i) = vbNullString
                Case r > 1
                    .Cells(.Rows.Count, "A").End(xlUp).Offset(1, 0).Resize(1, 2) = _
                        Array(valA(i), valB(i))
                    valA(i) = vbNullString
                    valB(i) = vbNullString
            End Select
        Next i

        valA = Replace(Application.Trim(Join(valA, Chr(32))), Chr(32), Chr(44))
        valB = Replace(Application.Trim(Join(valB, Chr(32))), Chr(32), Chr(44))

        .Cells(1, "A").Resize(1, 2) = Array(valA, valB)
    End With
End Sub

答案 2 :(得分:0)

您可以使用Dictionary对象

Option Explicit

Sub RemoveDupData()
    Dim AData As Variant, BData As Variant

    With Range("A1", cells(Rows.Count, 1).End(xlUp))
        AData = Application.Transpose(.Value)
        BData = Application.Transpose(.Offset(, 1).Value)
        .Resize(, 2).ClearContents
    End With

    Dim irow As Long
    For irow = 1 To UBound(AData)
        WriteNoDupes Split(AData(irow), ","), Split(BData(irow), ",")
    Next
    Range("A1:B1").Delete Shift:=xlUp
End Sub

Sub WriteNoDupes(ADatum As Variant, BDatum As Variant)
    Dim iItem As Long, key As Variant
    With CreateObject("scripting.dictionary")
        For iItem = 0 To UBound(ADatum)
            .Item(BDatum(iItem)) = .Item(BDatum(iItem)) & " " & ADatum(iItem)
        Next
        For Each key In .Keys
            cells(Rows.Count, 1).End(xlUp).Offset(1).Value = Replace(Trim(.Item(key)), " ", ",")
            cells(Rows.Count, 2).End(xlUp).Offset(1).Value = key
        Next
    End With
End Sub