我在VBA中制作了两个不同的脚本来计算CSV中包含的单词的频率。这两个脚本运行良好,但每个单词的数字都不同,我不知道为什么。以下是导致出现差异的时刻的一些步骤
脚本1:
Sub Dict_Array_1()
Dim Wb As Workbook, Wb1 As Workbook
Dim Ws As Worksheet, Ws1 As Worksheet
Dim Fd As Office.FileDialog
Dim StrFile As String
Dim i As Long, a As Long, LastR As Long
Dim Arr() As Variant
Dim Ban_() As String, T As String
Dim Ban As Object, Dict As Object
Dim Carac As Variant, w As Variant, Key As Variant
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Application.DisplayStatusBar = False
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
Application.DisplayAlerts = False
Set Wb = ActiveWorkbook
Set Ws = Wb.ActiveSheet
'---------- CSV ---------------------------------------------------------------------------------------------------------------
Set Fd = Application.FileDialog(msoFileDialogFilePicker)
With Fd
.AllowMultiSelect = False
.Title = "Select doc"
.Filters.Clear
.Filters.Add "Doc CSV (*.csv)", "*.csv"
If .Show Then
On Error GoTo ErrOpen 'ignore this
Set Wb1 = Workbooks.Open(.SelectedItems(1), ReadOnly:=True, Local:=False)
On Error GoTo 0
Set Ws1 = Wb1.Sheets(1)
With Ws1
LastR = .Cells(.Rows.Count, "S").End(xlUp).Row
Arr = .Range(Cells(1, 19), Cells(LastR, 19)).Value2
End With
Wb1.Close 0
Set Wb1 = Nothing
Set Ws1 = Nothing
Else
Exit Sub
End If
End With
'---------------------------------------- COUNT ----------------------------------------------------------------------------------------------------
'Array with words i want to ban
Ban_ = Split("word1,word2,word3,etc", ",")
'Array with caract i want to ban
Carac = Array(".", ",", ";", ":", "!", "#", "$", "%", "&", "(", ")", "- ", "_", "--", "+", _
"=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*", ">>", "»", "«")
Set Ban = CreateObject("Scripting.Dictionary") 'need late binding
Ban.CompareMode = vbTextCompare 'case insensitive
For i = 0 To UBound(Ban_)
Ban.Add Ban_(i), 1
Next i
Erase Ban_
'Dict to count words
Set Dict = CreateObject("Scripting.Dictionary")
Dict.CompareMode = vbTextCompare 'case insensitive
For a = 1 To UBound(Arr, 1)
If Not IsError(Arr(a, 1))
T = Arr(a, 1)
For i = 0 To UBound(Carac)
T = Replace(T, Carac(i), "", , , vbTextCompare)
Next i
T = Application.Trim(T)
For Each w In Split(T, " ")
If Not Ban.exists(w) Then
If Not Dict.exists(w) Then
Dict.Add w, 1
Else
Dict.Item(w) = Dict.Item(w) + 1
End If
End If
Next w
End If
Next a
Exit Sub
Erase Arr
Erase Carac
Set Ban = Nothing
脚本2基本相同,唯一的区别是我以另一种方式访问.CSV:
Sub Dict_ADODB()
Dim Wb As Workbook, Wb1 As Workbook
Dim Ws As Worksheet, Ws1 As Worksheet
Dim Fd As Office.FileDialog
Dim StrFile As String
Dim i As Long, a As Long, LastR As Long
Dim Arr() As Variant
Dim Ban_() As String, T As String
Dim Ban As Object, Dict As Object
Dim Carac As Variant, w As Variant, Key As Variant
Dim ObjC As Object, ObjR As Object 'Object Connection / Object Recordset
Const adOpenStatic = 3
Const adLockOptimistic = 3
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Application.DisplayStatusBar = False
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
Application.DisplayAlerts = False
Set Wb = ActiveWorkbook
Set Ws = Wb.ActiveSheet
'---------- CSV ---------------------------------------------------------------------------------------------------------------
Set Fd = Application.FileDialog(msoFileDialogFilePicker)
With Fd
.AllowMultiSelect = False
.Title = "Select doc"
.Filters.Clear
.Filters.Add "Doc CSV (*.csv)", "*.csv"
If .Show Then
'----------- ADODB ---
Set ObjC = CreateObject("ADODB.Connection")
Set ObjR = CreateObject("ADODB.RecordSet")
On Error GoTo ErrOpen
ObjC.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
"Data Source=" & .InitialFileName & ";" & _
"Extended Properties=""text;HDR=YES;FMT=Delimited;CharacterSet=65001"""
On Error GoTo 0
'I just need one column
ObjR.Open "SELECT Message FROM " & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")) & _
" WHERE Message IS NOT NULL", _
ObjC, adOpenStatic, adLockOptimistic
Arr = ObjR.GetRows()
ObjR.Close
ObjC.Close
Set ObjR = Nothing
Set ObjC = Nothing
Else
Exit Sub
End If
End With
'---------------------------------------- COUNT ----------------------------------------------------------------------------------------------------
'Array with word I don't need
Ban_ = Split("word1,word2", ",")
Carac = Array(".", ",", ";", ":", "!", "#", "$", "%", "&", "(", ")", "- ", "_", "--", "+", _
"=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*", ">>", "»", "«")
Set Ban = CreateObject("Scripting.Dictionary")
Ban.CompareMode = vbTextCompare
For i = 0 To UBound(Ban_)
Ban.Add Ban_(i), 1
Next i
Erase Ban_
Set Dict = CreateObject("Scripting.Dictionary")
Dict.CompareMode = vbTextCompare 'case insensitive
For a = 0 To UBound(Arr, 2)
If Not IsError(Arr(0, a)) Then
T = Arr(0, a)
For i = 0 To UBound(Carac)
T = Replace(T, Carac(i), "", , , vbTextCompare)
Next i
T = Application.Trim(T)
For Each w In Split(T, " ")
If Not Ban.exists(w) Then
If Not Dict.exists(w) Then
Dict.Add w, 1
Else
Dict.Item(w) = Dict.Item(w) + 1
End If
End If
Next w
End If
Next a
Erase Arr
Erase Carac
Set Ban = Nothing
Exit Sub
你走了。当我做dict.count时,我确实发现条目的总数是不同的,这只是部分解释为使用" WHERE消息IS NOT NULL"。任何想法为什么会非常感激!
答案 0 :(得分:2)
查看正在发生的事情的最佳情况是在此行写一些日志:
Dict.Add w, 1
例如,如果值最多为200,则写:
Dim cnt as long
Dict.Add w, 1
cnt = cnt + 1
Debug.Print cnt, w
如果值大于200,那么只有最后200个会显示在即时窗口上,因此对您没有多大帮助。您可以使用日志构建一个String,并在记事本中使用相同的字符串打印String。
Dim cnt as Long
Dim logString as String
Dict.Add w, 1
cnt = cnt + 1
logString = logString & VbCrLF & cnt, w
最后CreateLogFile logString
:
Sub CreateLogFile(Optional strPrint As String)
Dim fs As Object
Dim obj_text As Object
Dim str_filename As String
Dim str_new_file As String
Dim str_shell As String
str_new_file = "\tests_info\"
str_filename = ThisWorkbook.Path & str_new_file
If Dir(ThisWorkbook.Path & str_new_file, vbDirectory) = vbNullString Then
MkDir ThisWorkbook.Path & str_new_file
End If
Set fs = CreateObject("Scripting.FileSystemObject")
Set obj_text = fs.CreateTextFile(str_filename & "\sometext.txt", True)
obj_text.writeline (strPrint)
obj_text.Close
str_shell = "C:\WINDOWS\notepad.exe "
str_shell = str_shell & str_filename & "\sometext.txt"
Shell str_shell
End Sub
答案 1 :(得分:0)
好吧,使用Schema.ini似乎解决了我的问题。文档中不清楚的是,应该为CSV中的每一列设置“colX = Y Type”,直到他想要选择的那个(起初我只设置“Col19 = Message”但是因为之前的列而失败了哪里没有设置......)。
我正在为感兴趣的人分享代码的相关部分(Excel 2010 / X86版本):
Set fs = CreateObject("Scripting.FileSystemObject")
Set obj_text = fs.CreateTextFile(.InitialFileName & "\Schema.ini", True)
obj_text.write ("[" & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")) & "]" & vbNewLine & _
"ColNameHeader=False" & vbNewLine & _
"CharacterSet=65001" & vbNewLine & _
"Format=CSVDelimited" & vbNewLine & _
"DecimalSymbol=." & vbNewLine & _
"Col1=1 Text" & vbNewLine & _
"Col2=2 Text" & vbNewLine & _
"Col3=3 Text" & vbNewLine & _
"Col4=4 Text" & vbNewLine & _
"Col5=5 Text" & vbNewLine & _
"Col6=6 Text" & vbNewLine & _
"Col7=7 Text" & vbNewLine & _
"Col8=8 Text" & vbNewLine & _
"Col9=9 Text" & vbNewLine & _
"Col10=10 Text" & vbNewLine & _
"Col11=11 Text" & vbNewLine & _
"Col12=12 Text" & vbNewLine & _
"Col13=13 Text" & vbNewLine & _
"Col14=14 Text" & vbNewLine & _
"Col15=15 Text" & vbNewLine & _
"Col16=16 Text" & vbNewLine & _
"Col17=17 Text" & vbNewLine & _
"Col18=18 Text" & vbNewLine & _
"Col19=GOODONE Memo") 'set all the previous cols until the one I need!
obj_text.Close
Set ObjC = CreateObject("ADODB.Connection")
Set ObjR = CreateObject("ADODB.RecordSet")
ObjC.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
"Data Source=" & .InitialFileName & ";" & _
"Extended Properties=""text;HDR=No;"""
ObjR.Open "SELECT GOODONE FROM " & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")), _
ObjC, 0, 1
Arr = ObjR.GetRows()