我正在使用正则表达式来查找Word文档中的所有模式匹配,然后我将对其进行操作。
我搜索的文件长约330页,包含复制/粘贴的电子邮件。我的问题是,当我使用InStr(startPos, objRange.Text, match.submatches(0))
来查找每个匹配的起始位置时,结果实际上会偏移一些量。对于处于原始状态的文档,该偏移量恰好为324个字符。
在预感中,我决定删除文档中的所有超链接,看看它会做什么。 RemoveHyperlinks子找到并删除了24个超链接,之后Instr()
返回值仅关闭了20个字符(因此减去幻数matchStart = matchStart - 1 - 20
给出了正确的起始位置)。显然我想避免所有魔法数字,但我无法弄清楚最后20个字符的来源。
我尝试取消链接所有字段,但删除超链接后没有取消链接。
关于为什么
的任何想法matchStart = InStr(startPos, objRange.Text, match.submatches(0))
matchEnd = matchStart + Len(match.submatches(0))
Set subRange0 = objDoc.Range(matchStart, matchEnd)
让我subRange0.Text
与match.submatches(0)
不同?或者可以找到其他隐藏字符(要删除)?
Sub FixHighlightedText()
Dim objDoc As Document
Dim objRange As Range, subRange0 As Range
Dim matchStart As Long, matchEnd As Long, startPos As Long
Dim regex As Object
Dim matches
Set objDoc = ActiveDocument
Set objRange = objDoc.Range(0, objDoc.Content.End)
startPos = 1
Set regex = CreateObject("VBScript.RegExp")
Call RemoveHyperlinks
With regex
.Pattern = "((\([a-zA-Z]*?[-]?Time:.*?\})[a-zA-Z0-9]{0,3})"
.Global = True
End With
If regex.test(objRange.Text) Then
Set matches = regex.Execute(objRange.Text)
Debug.Print "Document has " & matches.Count & " matches"
Debug.Print "Document range is " & objRange.Start & " to " & objRange.End
Debug.Print "FirstIndex = " & matches(0).FirstIndex
For Each match In matches
matchStart = InStr(startPos, objRange.Text, match.submatches(0))
startPos = matchStart + Len(match.submatches(0))
If matchStart > 0 Then
matchStart = matchStart - 1
matchEnd = matchStart + Len(match.submatches(0))
Set subRange0 = objDoc.Range(matchStart, matchEnd)
Debug.Print "Match starts at " & matchStart & " and ends at " & (matchStart + Len(match.submatches(1)))
Debug.Print " match0 text = " & match.submatches(0)
Debug.Print " subrange0 text = " & subRange0.Text
Else
Debug.Print "Match mysteriously not found in text"
End If
Next match
Else
Debug.Print "No regex matches"
End If
End Sub
Sub RemoveHyperlinks()
Dim link, cnt As Long, linkRange As Range, i As Long
cnt = 0
For i = ActiveDocument.Hyperlinks.Count To 1 Step -1
With ActiveDocument.Hyperlinks(i)
.TextToDisplay = .TextToDisplay & " (" & .Address & ")"
Set linkRange = .Range
End With
ActiveDocument.Hyperlinks(i).Delete
With linkRange.Font
.Underline = wdUnderlineNone
.ColorIndex = wdAuto
End With
cnt = cnt + 1
Next i
Debug.Print "Removed " & cnt & " link(s)"
End Sub
Sub RemoveFields()
Dim cnt As Long, i As Long
cnt = 0
For i = ActiveDocument.Fields.Count To 1 Step -1
ActiveDocument.Fields(i).Unlink
cnt = cnt + 1
Next i
Debug.Print "Removed " & cnt & " field(s)"
End Sub
答案 0 :(得分:0)
我最终在这个问题的选定答案中找到了我答案的提示:vbscript: replace text in activedocument with hyperlink。
基本上,Instr()
与Word的WYSIWYG功能不兼容,但Find
方法会选择适当的范围。无需删除超链接,也不必担心其他神秘隐藏文本。
代码如下:
Sub FixHighlightedText()
Dim objDoc As Document
Dim objRange As Range
Dim startPos As Long
Dim regex As Object
Dim matches
Set objDoc = ActiveDocument
Set objRange = objDoc.Range
startPos = 1
Set regex = CreateObject("VBScript.RegExp")
With regex
.Pattern = "((\([a-zA-Z]*?[-]?Time:.*?\})[a-zA-Z0-9]{0,3})"
.Global = True
End With
If regex.test(objRange.Text) Then
Set matches = regex.Execute(objRange.Text)
Debug.Print "Document has " & matches.Count & " matches"
Debug.Print "Document range is " & objRange.Start & " to " & objRange.End
Debug.Print "FirstIndex = " & matches(0).FirstIndex
For Each match In matches
Set objRange = objDoc.Range(startPos, objDoc.Content.End)
With objRange.Find
.Text = match.submatches(0)
.MatchWholeWord = True
.MatchCase = True
.Wrap = wdFindStop
.Execute
End With
startPos = objRange.End
Debug.Print "Match starts at " & objRange.Start & " and ends at " & objRange.End
Debug.Print " match0 text = " & match.submatches(0)
Debug.Print " subrange text = " & objRange.Text
Next match
Else
Debug.Print "No regex matches"
End If
End Sub