使用VBA发出计数字频率:相同的数据,不同的数字

时间:2018-04-20 14:45:44

标签: arrays vba excel-vba access-vba excel

我在VBA中制作了两个不同的脚本来计算CSV中包含的单词的频率。这两个脚本运行良好,但每个单词的数字都不同,我不知道为什么。以下是导致出现差异的时刻的一些步骤

脚本1:

Sub Dict_Array_1()

Dim Wb As Workbook, Wb1 As Workbook
Dim Ws As Worksheet, Ws1 As Worksheet
Dim Fd As Office.FileDialog
Dim StrFile As String
Dim i As Long, a As Long, LastR As Long
Dim Arr() As Variant
Dim Ban_() As String, T As String
Dim Ban As Object, Dict As Object
Dim Carac As Variant, w As Variant, Key As Variant 

Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Application.DisplayStatusBar = False
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
Application.DisplayAlerts = False

Set Wb = ActiveWorkbook
Set Ws = Wb.ActiveSheet


'---------- CSV ---------------------------------------------------------------------------------------------------------------

Set Fd = Application.FileDialog(msoFileDialogFilePicker)
With Fd
  .AllowMultiSelect = False
  .Title = "Select doc"
  .Filters.Clear
  .Filters.Add "Doc CSV (*.csv)", "*.csv"

    If .Show Then

        On Error GoTo ErrOpen 'ignore this
        Set Wb1 = Workbooks.Open(.SelectedItems(1), ReadOnly:=True, Local:=False) 
        On Error GoTo 0

        Set Ws1 = Wb1.Sheets(1)
        With Ws1
            LastR = .Cells(.Rows.Count, "S").End(xlUp).Row 

            Arr = .Range(Cells(1, 19), Cells(LastR, 19)).Value2 
        End With

        Wb1.Close 0
        Set Wb1 = Nothing
        Set Ws1 = Nothing
    Else
        Exit Sub
    End If
End With

'---------------------------------------- COUNT ----------------------------------------------------------------------------------------------------
'Array with words i want to ban
Ban_ = Split("word1,word2,word3,etc", ",")

'Array with caract i want to ban
Carac = Array(".", ",", ";", ":", "!", "#", "$", "%", "&", "(", ")", "- ", "_", "--", "+", _
                            "=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*", ">>", "»", "«")

Set Ban = CreateObject("Scripting.Dictionary") 'need late binding
Ban.CompareMode = vbTextCompare 'case insensitive
For i = 0 To UBound(Ban_)
    Ban.Add Ban_(i), 1
Next i
Erase Ban_

'Dict to count words
Set Dict = CreateObject("Scripting.Dictionary")
Dict.CompareMode = vbTextCompare 'case insensitive
For a = 1 To UBound(Arr, 1) 
    If Not IsError(Arr(a, 1)) 
        T = Arr(a, 1)
        For i = 0 To UBound(Carac)
            T = Replace(T, Carac(i), "", , , vbTextCompare) 
        Next i
        T = Application.Trim(T) 


        For Each w In Split(T, " ")
            If Not Ban.exists(w) Then
                If Not Dict.exists(w) Then
                    Dict.Add w, 1
                Else
                    Dict.Item(w) = Dict.Item(w) + 1 
                End If
            End If
        Next w
    End If
Next a
Exit Sub

Erase Arr
Erase Carac
Set Ban = Nothing

脚本2基本相同,唯一的区别是我以另一种方式访问​​.CSV:

Sub Dict_ADODB()
Dim Wb As Workbook, Wb1 As Workbook
Dim Ws As Worksheet, Ws1 As Worksheet
Dim Fd As Office.FileDialog
Dim StrFile As String
Dim i As Long, a As Long, LastR As Long
Dim Arr() As Variant
Dim Ban_() As String, T As String
Dim Ban As Object, Dict As Object
Dim Carac As Variant, w As Variant, Key As Variant 
Dim ObjC As Object, ObjR As Object 'Object Connection / Object Recordset
Const adOpenStatic = 3
Const adLockOptimistic = 3

Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Application.DisplayStatusBar = False
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
Application.DisplayAlerts = False

Set Wb = ActiveWorkbook
Set Ws = Wb.ActiveSheet


'---------- CSV ---------------------------------------------------------------------------------------------------------------

Set Fd = Application.FileDialog(msoFileDialogFilePicker)
With Fd
  .AllowMultiSelect = False
  .Title = "Select doc"
  .Filters.Clear
  .Filters.Add "Doc CSV (*.csv)", "*.csv"

    If .Show Then
        '----------- ADODB ---
        Set ObjC = CreateObject("ADODB.Connection")
        Set ObjR = CreateObject("ADODB.RecordSet")

        On Error GoTo ErrOpen 
        ObjC.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
                  "Data Source=" & .InitialFileName & ";" & _
                  "Extended Properties=""text;HDR=YES;FMT=Delimited;CharacterSet=65001""" 
        On Error GoTo 0
        'I just need one column
        ObjR.Open "SELECT Message FROM " & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")) & _
                    " WHERE Message IS NOT NULL", _
          ObjC, adOpenStatic, adLockOptimistic
        Arr = ObjR.GetRows() 

        ObjR.Close
        ObjC.Close
        Set ObjR = Nothing
        Set ObjC = Nothing
    Else
        Exit Sub
    End If
End With

'---------------------------------------- COUNT ----------------------------------------------------------------------------------------------------
'Array with word I don't need
Ban_ = Split("word1,word2", ",")

Carac = Array(".", ",", ";", ":", "!", "#", "$", "%", "&", "(", ")", "- ", "_", "--", "+", _
                            "=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*", ">>", "»", "«")

Set Ban = CreateObject("Scripting.Dictionary") 
Ban.CompareMode = vbTextCompare 
For i = 0 To UBound(Ban_)
    Ban.Add Ban_(i), 1
Next i
Erase Ban_

Set Dict = CreateObject("Scripting.Dictionary")
Dict.CompareMode = vbTextCompare 'case insensitive
For a = 0 To UBound(Arr, 2) 
    If Not IsError(Arr(0, a)) Then 
        T = Arr(0, a)
        For i = 0 To UBound(Carac)
            T = Replace(T, Carac(i), "", , , vbTextCompare) 
        Next i
        T = Application.Trim(T) 

        For Each w In Split(T, " ")
            If Not Ban.exists(w) Then
                If Not Dict.exists(w) Then
                    Dict.Add w, 1
                Else
                    Dict.Item(w) = Dict.Item(w) + 1 
                End If
            End If
        Next w
    End If
Next a

Erase Arr
Erase Carac
Set Ban = Nothing
Exit Sub

你走了。当我做dict.count时,我确实发现条目的总数是不同的,这只是部分解释为使用" WHERE消息IS NOT NULL"。任何想法为什么会非常感激!

2 个答案:

答案 0 :(得分:2)

查看正在发生的事情的最佳情况是在此行写一些日志:

Dict.Add w, 1

例如,如果值最多为200,则写:

Dim cnt as long
Dict.Add w, 1
cnt = cnt + 1
Debug.Print cnt, w

如果值大于200,那么只有最后200个会显示在即时窗口上,因此对您没有多大帮助。您可以使用日志构建一个String,并在记事本中使用相同的字符串打印String。

Dim cnt       as Long
Dim logString as String
Dict.Add w, 1
cnt = cnt + 1
logString = logString & VbCrLF & cnt, w

最后CreateLogFile logString

Sub CreateLogFile(Optional strPrint As String)

    Dim fs                      As Object
    Dim obj_text                As Object
    Dim str_filename            As String
    Dim str_new_file            As String
    Dim str_shell               As String

    str_new_file = "\tests_info\"

    str_filename = ThisWorkbook.Path & str_new_file
    If Dir(ThisWorkbook.Path & str_new_file, vbDirectory) = vbNullString Then
         MkDir ThisWorkbook.Path & str_new_file
    End If

    Set fs = CreateObject("Scripting.FileSystemObject")
    Set obj_text = fs.CreateTextFile(str_filename & "\sometext.txt", True)

    obj_text.writeline (strPrint)
    obj_text.Close

    str_shell = "C:\WINDOWS\notepad.exe "
    str_shell = str_shell & str_filename & "\sometext.txt"
    Shell str_shell

End Sub

答案 1 :(得分:0)

好吧,使用Schema.ini似乎解决了我的问题。文档中不清楚的是,应该为CSV中的每一列设置“colX = Y Type”,直到他想要选择的那个(起初我只设置“Col19 = Message”但是因为之前的列而失败了哪里没有设置......)。

我正在为感兴趣的人分享代码的相关部分(Excel 2010 / X86版本):

  Set fs = CreateObject("Scripting.FileSystemObject")
  Set obj_text = fs.CreateTextFile(.InitialFileName & "\Schema.ini", True) 
  obj_text.write ("[" & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")) & "]" & vbNewLine & _
                  "ColNameHeader=False" & vbNewLine & _
                  "CharacterSet=65001" & vbNewLine & _
                  "Format=CSVDelimited" & vbNewLine & _
                  "DecimalSymbol=." & vbNewLine & _
                  "Col1=1 Text" & vbNewLine & _
                  "Col2=2 Text" & vbNewLine & _
                  "Col3=3 Text" & vbNewLine & _
                  "Col4=4 Text" & vbNewLine & _
                  "Col5=5 Text" & vbNewLine & _
                  "Col6=6 Text" & vbNewLine & _
                  "Col7=7 Text" & vbNewLine & _
                  "Col8=8 Text" & vbNewLine & _
                  "Col9=9 Text" & vbNewLine & _
                  "Col10=10 Text" & vbNewLine & _
                  "Col11=11 Text" & vbNewLine & _
                  "Col12=12 Text" & vbNewLine & _
                  "Col13=13 Text" & vbNewLine & _
                  "Col14=14 Text" & vbNewLine & _
                  "Col15=15 Text" & vbNewLine & _
                  "Col16=16 Text" & vbNewLine & _
                  "Col17=17 Text" & vbNewLine & _
                  "Col18=18 Text" & vbNewLine & _
                  "Col19=GOODONE Memo") 'set all the previous cols until the one I need!
  obj_text.Close

  Set ObjC = CreateObject("ADODB.Connection")
  Set ObjR = CreateObject("ADODB.RecordSet")

  ObjC.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
            "Data Source=" & .InitialFileName & ";" & _
            "Extended Properties=""text;HDR=No;"""

  ObjR.Open "SELECT GOODONE FROM " & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")), _
    ObjC, 0, 1 

  Arr = ObjR.GetRows()