按字符比较两个单元格中的(diff)字符串

时间:2015-09-03 18:27:13

标签: excel vba excel-vba

我在两个单元格中有不同文本的文本。我试图识别两个单元格之间的差异(文本之间的差异:添加或缺失的文本)

  1. A1我有一段文字。
  2. B1包含类似的段落,但存在细微差别。
  3. 我试图找出这些字符串之间的差异,请帮助我使用VBA识别 两个 单元格中的颜色差异

1 个答案:

答案 0 :(得分:6)

我有一个解决您问题的方法,并上传了一个包含示例字符串对的工作簿。这是workbook

我的代码基于Needleman–Wunsch algorithm,它最初是在1970年开发的,至今仍用于科学技术中的DNA序列比对。但是我修改了算法并添加了额外的后处理来处理样本数据字符串对。

以下是如何处理该过程。输入两个字符串以在A1和A2中进行比较。

按Alt-F8并运行宏AlignStrings

结果将显示在单元格A5和A6中。

请注意,从单元格A21开始,可以在工作表的下方找到其他示例字符串对。

以下是工作簿中的代码,它完成字符串对齐并突出显示差异:

Public Sub AlignStrings()
    Dim a() As Byte, b() As Byte, a_$, b_$, i&, j&, d&, u&, l&, x&, y&, f&()
    Const GAP = -1
    Const PAD = "_"

    a = [a1].Text: b = [a2].Text
    [a3:a6].Clear
    [a1:a6].Font.Name = "Courier New"

    ReDim f(0 To UBound(b) \ 2 + 1, 0 To UBound(a) \ 2 + 1)

    For i = 1 To UBound(f, 1)
        For j = 1 To UBound(f, 2)
            x = j - 1: y = i - 1
            If a(x * 2) = b(y * 2) Then
                d = 1 + f(y, x)
                u = 0 + f(y, j)
                l = 0 + f(i, x)
            Else
                d = -1 + f(y, x)
                u = GAP + f(y, j)
                l = GAP + f(i, x)
            End If
            f(i, j) = Max(d, u, l)
        Next
    Next

    i = UBound(f, 1): j = UBound(f, 2)
    On Error Resume Next
    Do
        x = j - 1: y = i - 1
        d = f(y, x)
        u = f(y, j)
        l = f(i, x)
        Select Case True
            Case Err
                If y < 0 Then GoTo left Else GoTo up
            Case d >= u And d >= l Or Mid$(a, j, 1) = Mid$(b, i, 1)
diag:
                a_ = Mid$(a, j, 1) & a_
                b_ = Mid$(b, i, 1) & b_
                i = i - 1: j = j - 1
            Case u > l
up:
                a_ = PAD & a_
                b_ = Mid$(b, i, 1) & b_
                i = i - 1
            Case l > u
left:
                a_ = Mid$(a, j, 1) & a_
                b_ = PAD & b_
                j = j - 1
        End Select
    Loop Until i < 1 And j < 1

    DecorateStrings a_, b_, [a5], [a6], PAD

End Sub


Private Function Max(a&, b&, c&) As Long
    Max = a
    If b > a Then Max = b
    If c > b Then Max = c
End Function


Private Sub DecorateStrings(a$, b$, rOutA As Range, rOutB As Range, PAD$)
    Dim i&, j&

    FloatArtifacts a, b, PAD
    FloatArtifacts b, a, PAD

    rOutA = a
    rOutB = b

    For i = 1 To Len(a)
        If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
            If Mid$(a, i, 1) <> PAD Then
                rOutA.Characters(i, 1).Font.Color = vbRed
            End If
        End If
    Next
    For i = 1 To Len(b)
        If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
            If Mid$(b, i, 1) <> PAD Then
                rOutB.Characters(i, 1).Font.Color = vbRed
            End If
        End If
    Next

End Sub


Private Sub FloatArtifacts(s1$, s2$, PAD$)
    Dim c&, k&, i&, p&
    For i = 1 To Len(s1)
        c = InStr(i, s1, PAD)
        If c Then
            k = 0
            Do
                k = k + 1
                If Mid$(s1, c + k, 1) <> PAD Then
                    If Mid$(s2, c, 1) = Mid$(s1, c + k, 1) Then
                        p = InStr(c + k, s1, PAD)
                        If p < (c + k + 6) And p > 0 Then
                            Mid$(s1, c, 1) = Mid$(s1, c + k, 1)
                            Mid$(s1, c + k, 1) = PAD
                            i = c
                            Exit Do
                        Else
                            i = c + k
                            Exit Do
                        End If
                    Else
                        i = c + k
                        Exit Do
                    End If
                End If
                If c + k > Len(s1) Then Exit Do
            Loop
        Else
            Exit For
        End If
    Next
End Sub