我在两个单元格中有不同文本的文本。我试图识别两个单元格之间的差异(文本之间的差异:添加或缺失的文本)
A1
我有一段文字。B1
包含类似的段落,但存在细微差别。我试图找出这些字符串之间的差异,请帮助我使用VBA识别 两个 单元格中的颜色差异
答案 0 :(得分:6)
我有一个解决您问题的方法,并上传了一个包含示例字符串对的工作簿。这是workbook。
我的代码基于Needleman–Wunsch algorithm,它最初是在1970年开发的,至今仍用于科学技术中的DNA序列比对。但是我修改了算法并添加了额外的后处理来处理样本数据字符串对。
以下是如何处理该过程。输入两个字符串以在A1和A2中进行比较。
按Alt-F8并运行宏AlignStrings
。
结果将显示在单元格A5和A6中。
请注意,从单元格A21开始,可以在工作表的下方找到其他示例字符串对。
以下是工作簿中的代码,它完成字符串对齐并突出显示差异:
Public Sub AlignStrings()
Dim a() As Byte, b() As Byte, a_$, b_$, i&, j&, d&, u&, l&, x&, y&, f&()
Const GAP = -1
Const PAD = "_"
a = [a1].Text: b = [a2].Text
[a3:a6].Clear
[a1:a6].Font.Name = "Courier New"
ReDim f(0 To UBound(b) \ 2 + 1, 0 To UBound(a) \ 2 + 1)
For i = 1 To UBound(f, 1)
For j = 1 To UBound(f, 2)
x = j - 1: y = i - 1
If a(x * 2) = b(y * 2) Then
d = 1 + f(y, x)
u = 0 + f(y, j)
l = 0 + f(i, x)
Else
d = -1 + f(y, x)
u = GAP + f(y, j)
l = GAP + f(i, x)
End If
f(i, j) = Max(d, u, l)
Next
Next
i = UBound(f, 1): j = UBound(f, 2)
On Error Resume Next
Do
x = j - 1: y = i - 1
d = f(y, x)
u = f(y, j)
l = f(i, x)
Select Case True
Case Err
If y < 0 Then GoTo left Else GoTo up
Case d >= u And d >= l Or Mid$(a, j, 1) = Mid$(b, i, 1)
diag:
a_ = Mid$(a, j, 1) & a_
b_ = Mid$(b, i, 1) & b_
i = i - 1: j = j - 1
Case u > l
up:
a_ = PAD & a_
b_ = Mid$(b, i, 1) & b_
i = i - 1
Case l > u
left:
a_ = Mid$(a, j, 1) & a_
b_ = PAD & b_
j = j - 1
End Select
Loop Until i < 1 And j < 1
DecorateStrings a_, b_, [a5], [a6], PAD
End Sub
Private Function Max(a&, b&, c&) As Long
Max = a
If b > a Then Max = b
If c > b Then Max = c
End Function
Private Sub DecorateStrings(a$, b$, rOutA As Range, rOutB As Range, PAD$)
Dim i&, j&
FloatArtifacts a, b, PAD
FloatArtifacts b, a, PAD
rOutA = a
rOutB = b
For i = 1 To Len(a)
If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
If Mid$(a, i, 1) <> PAD Then
rOutA.Characters(i, 1).Font.Color = vbRed
End If
End If
Next
For i = 1 To Len(b)
If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
If Mid$(b, i, 1) <> PAD Then
rOutB.Characters(i, 1).Font.Color = vbRed
End If
End If
Next
End Sub
Private Sub FloatArtifacts(s1$, s2$, PAD$)
Dim c&, k&, i&, p&
For i = 1 To Len(s1)
c = InStr(i, s1, PAD)
If c Then
k = 0
Do
k = k + 1
If Mid$(s1, c + k, 1) <> PAD Then
If Mid$(s2, c, 1) = Mid$(s1, c + k, 1) Then
p = InStr(c + k, s1, PAD)
If p < (c + k + 6) And p > 0 Then
Mid$(s1, c, 1) = Mid$(s1, c + k, 1)
Mid$(s1, c + k, 1) = PAD
i = c
Exit Do
Else
i = c + k
Exit Do
End If
Else
i = c + k
Exit Do
End If
End If
If c + k > Len(s1) Then Exit Do
Loop
Else
Exit For
End If
Next
End Sub