我需要编写一个MS Word宏来计算给定文档中每个单词的出现次数并打印出类似的列表。 我做了宏并且它可以工作,但它是如此懒散,需要几个小时来获得60000字的文档的结果。 你能否就如何让宏运行得更快给我一些建议/建议?
(我在WORD VBA Count Word Occurrences检查了一个类似的问题,但仍然没有得到它如何加速并需要我的宏进行审核)。谢谢。
Private Type WordStatData
WordText As String
WordCount As Integer
End Type
Option Base 1
'Check if the word is valid
Private Function IsValidWord(SomeString As String) As Boolean
Dim Retval As Boolean
Retval = True
If Not (InStr(SomeString, " ") = 0) Then Retval = False
If Not (InStr(SomeString, ".") = 0) Then Retval = False
If Not (InStr(SomeString, ",") = 0) Then Retval = False
If Not InStr(SomeString, "0") = 0 Then Retval = False
If Not InStr(SomeString, "1") = 0 Then Retval = False
If Not InStr(SomeString, "2") = 0 Then Retval = False
If Not InStr(SomeString, "3") = 0 Then Retval = False
If Not InStr(SomeString, "4") = 0 Then Retval = False
If Not InStr(SomeString, "5") = 0 Then Retval = False
If Not InStr(SomeString, "6") = 0 Then Retval = False
If Not InStr(SomeString, "7") = 0 Then Retval = False
If Not InStr(SomeString, "8") = 0 Then Retval = False
If Not InStr(SomeString, "9") = 0 Then Retval = False
IsValidWord = Retval
End Function
Private Sub CommandButton1_Click()
SpanishLCID = 3082 'The source text is in Spanish
ListBox1.Clear
Dim WordsTotal As Long
WordsTotal = ActiveDocument.Words.Count
TextBox1.Text = Str(WordsTotal)
Dim Wordfound As Boolean
Dim NewWord As String
Dim MyData() As WordStatData
ReDim Preserve MyData(1)
NewWord = ""
For i = 1 To WordsTotal
NewWord = Trim(StrConv(Trim(ActiveDocument.Words(i)), vbLowerCase, SpanishLCID))
'Check if the word is in the list
If IsValidWord(NewWord) Then
Wordfound = False
For j = 1 To UBound(MyData)
If StrComp(MyData(j).WordText, NewWord) = 0 Then
Wordfound = True: Exit For
End If
Next j
If Wordfound Then
MyData(j).WordCount = MyData(j).WordCount + 1
Else
ReDim Preserve MyData(UBound(MyData) + 1)
MyData(UBound(MyData)).WordText = NewWord
MyData(UBound(MyData)).WordCount = 1
End If
End If
Next i
'Printing out the word list
For i = 1 To UBound(MyData)
ListBox1.AddItem (MyData(i).WordText & "=" & Str(MyData(i).WordCount))
Next i
End Sub
答案 0 :(得分:4)
添加对 Microsoft Scripting Runtime 的引用(工具 - > 引用... )。然后使用以下内容:
Private Sub CommandButton1_Click()
Const SpanishLCID = 3082
Dim dict As New Scripting.Dictionary, word As Variant, fixedWord As String
Dim key As Variant
dict.CompareMode = SpanishLCID
For Each word In ActiveDocument.Words
fixedWord = Trim(StrConv(Trim(word), vbLowerCase, SpanishLCID))
If Not dict.Exists(fixedWord) Then
dict(fixedWord) = 1
Else
dict(fixedWord) = dict(fixedWord) + 1
End If
Next
ListBox1.Clear
For Each key In dict.Keys
ListBox1.AddItem key & "=" & dict(key)
Next
End Sub
NB。 Word将每个标点符号或段落视为新单词。建议使用不应添加到字典中的字符串指定另一个字典或集合,并在添加到字典之前使用.Exists
测试这些字符串。
更简洁的IsValidWord
版本,没有正则表达式:
Function IsValidWord(s As String) As Boolean
Const validChars As String = "abcdefghijklmnopqrstuvwxyz"
Dim i As Integer, char As String * 1
For i = 1 To Len(s)
char = Mid(s, i, 1)
If InStr(1, validChars, char, vbTextCompare) = 0 Then Exit Function
Next
IsValidWord = True
End Function
并使用regular expressions(添加对 Microsoft VBScript正则表达式5.5 的引用):
Dim regex As RegExp
Function IsValidWord2(s As String) As Boolean
If regex Is Nothing Then
Set regex = New RegExp
regex.Pattern = "[^a-z]"
regex.IgnoreCase = True
End If
IsValidWord2 = Not regex.Test(s)
End Function
并使用带替换的正则表达式:
Function GetValidWord(s As String) As String
'GetValidWord("Introduction.......3") will return "Introduction"
If regex2 Is Nothing Then
Set regex2 = New RegExp
regex2.Pattern = "[^a-z]"
regex2.Global = True
regex2.IgnoreCase = True
End If
GetValidWord = regex2.Replace(s, "")
End Function
您将按如下方式使用它:
For Each word In ActiveDocument.Words
fixedWord = Trim(StrConv(Trim(word), vbLowerCase, SpanishLCID))
fixedWord = GetValidWord(fixedWord)
If Not dict.Exists(fixedWord) Then
注意:您可以将语言转换和Trim
合并到GetValidWord
。