我有一个大型电子表格,我想对特定列执行单词计数,以找出最常用的单词。此列包含大量数据和文本。
例如,“员工爬梯子从顶层货架上取回商品。梯子开始摇摆,员工失去平衡而跌倒。右腿受伤”。这样有大约1000种不同的记录。我希望使用数据透视表来确定本专栏中所有单元格中最常用的单词。
我不知道该怎么做。任何人都可以协助如何做到这一点吗?
目前使用以下代码:
Option Explicit
Sub MakeWordList()
Dim InputSheet As Worksheet
Dim WordListSheet As Worksheet
Dim PuncChars As Variant, x As Variant
Dim i As Long, r As Long
Dim txt As String
Dim wordCnt As Long
Dim AllWords As Range
Dim PC As PivotCache
Dim PT As PivotTable
Application.ScreenUpdating = False
Set InputSheet = ActiveSheet
Set WordListSheet = Worksheets.Add(after:=Worksheets(Sheets.Count))
WordListSheet.Range("A1") = "All Words"
WordListSheet.Range("A1").Font.Bold = True
InputSheet.Activate
wordCnt = 2
PuncChars = Array(".", ",", ";", ":", "'", "!", "#", _
"$", "%", "&", "(", ")", " - ", "_", "--", "+", _
"=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*")
r = 1
' Loop until blank cell is encountered
Do While Cells(r, 1) <> ""
' covert to UPPERCASE
txt = UCase(Cells(r, 1))
' Remove punctuation
For i = 0 To UBound(PuncChars)
txt = Replace(txt, PuncChars(i), "")
Next i
' Remove excess spaces
txt = WorksheetFunction.Trim(txt)
' Extract the words
x = Split(txt)
For i = 0 To UBound(x)
WordListSheet.Cells(wordCnt, 1) = x(i)
wordCnt = wordCnt + 1
Next i
r = r + 1
Loop
' Create pivot table
WordListSheet.Activate
Set AllWords = Range("A1").CurrentRegion
Set PC = ActiveWorkbook.PivotCaches.Add _
(SourceType:=xlDatabase, _
SourceData:=AllWords)
Set PT = PC.CreatePivotTable _
(TableDestination:=Range("C1"), _
TableName:="PivotTable1")
With PT
.AddDataField .PivotFields("All Words")
.PivotFields("All Words").Orientation = xlRowField
End With
End Sub
答案 0 :(得分:5)
这是一个快速而肮脏的宏(我今天感觉更有帮助)。把它放在你的工作簿模块中。注意:我假设您将激活的工作表是包含A列中所有文本的工作表。
Sub Test()
Dim lastRow&, i&, tempLastRow&
Dim rawWS As Worksheet, tempWS As Worksheet
Set rawWS = ActiveSheet
Set tempWS = Sheets.Add
tempWS.Name = "Temp"
rawWS.Activate
'tempWS.Columns(1).Value = rawWS.Columns(1).Value
tempLastRow = 1
With rawWS
.Columns("A:A").TextToColumns Destination:=Range("A1"), DataType:=xlDelimited, _
TextQualifier:=xlDoubleQuote, ConsecutiveDelimiter:=True, Tab:=True, _
Semicolon:=False, Comma:=False, Space:=True, Other:=False, TrailingMinusNumbers:=True
lastRow = .Cells(.Rows.Count, 1).End(xlUp).Row
For i = lastRow To 1 Step -1
.Rows(i).EntireRow.Copy
tempWS.Range("A" & tempLastRow).PasteSpecial Paste:=xlPasteAll, Transpose:=True
' tempWS.Range ("A" & tempLastRow)
tempLastRow = tempWS.Cells(tempWS.Rows.Count, 1).End(xlUp).Row + 1
Next i
Application.CutCopyMode = False
End With
With tempWS
' Now, let's get unique words and run a count
.Range("A:A").Copy .Range("C:C")
.Range("C:C").RemoveDuplicates Columns:=1, Header:=xlNo
tempLastRow = .Cells(.Rows.Count, 3).End(xlUp).Row
.Range(.Cells(1, 4), .Cells(tempLastRow, 4)).FormulaR1C1 = "=COUNTIF(C[-3],RC[-1])"
.Sort.SortFields.Clear
.Sort.SortFields.Add Key:=Range("D1:D1048576") _
, SortOn:=xlSortOnValues, Order:=xlDescending, DataOption:=xlSortNormal
With .Sort
.SetRange Range("C1:D1048576")
.Header = xlGuess
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
End With
End Sub
基本上,它会创建一个新工作表,计算所有单个单词,并将单词(和计数)放在一列中,按最常用的顺序排序。您可以根据需要进行调整。
注意:我在添加代码之前就这样做了。它不会创建一个数据透视表,但根据我的理解,如果你只需要最常用的单词,那么数据透视表会有点过分。但是,如果您需要任何修改或更改,请与我们联系!
答案 1 :(得分:4)
以下是显示每个字及其显示次数的例程(使用Split
和Collection
s)
用法:CountTheWordsInRange Range("A1:A4")
Sub CountTheWordsInRange(RangeToCheck As Range)
Dim wordList As New Collection
Dim keyList As New Collection
Dim c
For Each c In RangeToCheck
Dim words As Variant
words = Split(c, " ") 'Pick a delimiter
For Each w In words
Dim temp
temp = -1
On Error Resume Next
temp = wordList(w)
On Error GoTo 0
If temp = -1 Then
wordList.Add 1, Key:=w
keyList.Add w, Key:=w
Else
wordList.Remove (w)
keyList.Remove (w)
wordList.Add temp + 1, w
keyList.Add w, Key:=w
End If
Next w
Next c
'Here we can display the word counts
'KeyList is a collection that contains each word
'WordList is a collection that contains each amount
Dim x
For x = 1 To wordList.Count
With Sheets("Sheet1")
.Cells(x, "E").Value = keyList(x) 'Display Word in column "E"
.Cells(x, "F").Value = wordList(x) 'Display Count in column "F"
End With
Next x
End Sub
结果: