从文本文件中读取非重复值到集合中

时间:2015-11-12 15:43:02

标签: excel-vba collections duplicates text-files userform

我无法使用我正在阅读的大文本文件中使用唯一值填充集合。我尝试将所有值读入集合,然后删除重复项,但我用来执行此操作的代码需要很长时间才能运行。我一直在阅读有关使用唯一“密钥”识别集合对象的内容,但我不确定如何将其合并到我拥有的代码中以及我想要完成的内容中。以下是我目前的代码:

Option Explicit
Private Sub UserForm_Initialize()
    'Declare variables
    Const CMMData As String = "\\ATSTORE01\CMMData\21064D\21064D-OP400.dat"
    Dim strSN As New Collection
    Dim strSet As New Collection
    Dim strUniqueSet As New Collection
    Dim strFF As New Collection
    Dim strVHCC As New Collection
    Dim strVHCCMID As New Collection
    Dim strVHCVMID As New Collection
    Dim strVHCV As New Collection
    Dim strHWCC As New Collection
    Dim strHWCCMID As New Collection
    Dim strHWCVMID As New Collection
    Dim strHWCV As New Collection
    Dim LineData As String
    Dim SplitData() As String
    Dim LineIter As Long
    Dim UniqueSet As Variant
    Dim UniqueSet1 As Variant
    'Populate Set Number Listbox
    LineIter = 0
    With New Scripting.FileSystemObject
        With .OpenTextFile(CMMData, ForReading)
            Do Until .AtEndOfStream
                LineIter = LineIter + 1
                If LineIter <= 4 Then
                    .SkipLine
                Else
                    LineData = .ReadLine
                    SplitData = Split(LineData, ",")
                    'Extracting Serial Number
                    strSN.Add SplitData(0)
                    'Extracting Set Number
                    strSet.Add SplitData(1)
                    'Extracting Unique Set Number
                    strUniqueSet.Add SplitData(1)  'This is where I'd like to very cleanly extract only unique, non-duplicate set numbers into this particular collection.
                    'Extracting Final Flow Area
                    strFF.Add SplitData(14)
                    'Extracting /V/ To Hook CC
                    strVHCC.Add SplitData(96)
                    'Extracting /V/ To Hook CC Mid
                    strVHCCMID.Add SplitData(97)
                    'Extracting /V/ To Hook CV Mid
                    strVHCVMID.Add SplitData(98)
                    'Extracting /V/ To Hook CV
                    strVHCV.Add SplitData(99)
                    'Extracting Hook Width CC
                    strVHCV.Add SplitData(134)
                    'Extracting Hook Width CC Mid
                    strVHCV.Add SplitData(135)
                    'Extracting Hook Width CV Mid
                    strVHCV.Add SplitData(136)
                    'Extracting Hook Width CV
                    strVHCV.Add SplitData(137)
                    'Set_Select.AddItem SplitData(1)
                End If
            Loop
            .Close
        End With
        'Below is the code I was using to remove the duplicate entries from the strUniqueSet collection
        For UniqueSet = strUniqueSet.Count To 2 Step -1
            For UniqueSet1 = (UniqueSet - 1) To 1 Step -1
                On Error GoTo DisplayUniqueSet
                If strUniqueSet.Item(UniqueSet) = strUniqueSet.Item(UniqueSet1) Then
                    strUniqueSet.Remove (UniqueSet)
                Else
                    Set_Select.AddItem strUniqueSet(UniqueSet)
                End If
            Next UniqueSet1
        Next UniqueSet
    End With
    Exit Sub
DisplayUniqueSet:
    MsgBox UniqueSet
End Sub

此部分代码的最终目的是使用strUniqueSet集合中的值填充列表框。然后,用户将选择一个非重复的集合编号,然后程序将从其他集合中提取与所选集合编号相关的所有值。

我非常感谢帮助。

2 个答案:

答案 0 :(得分:0)

根据我的经验,集合和词典可能很慢,特别是如果您在同一代码中有多个对象。我建议将值加载到一个多维数组中,然后首先循环数组以查看值是否存在,并且仅在不存在的情况下才添加。然后,当用户通过再次循环数组选择非重复数字时,您可以从数组中获取信息。

见下面的代码。请注意,它将生成多个数组元素,这些元素基本上是当前写入的空白。

Option Explicit
Private Sub UserForm_Initialize()

    Const CMMData As String = "\\ATSTORE01\CMMData\21064D\21064D-OP400.dat"
    Dim LineData As String
    Dim SplitData() As String
    Dim LineIter As Long
    Dim UniqueSet As Variant
    Dim UniqueSet1 As Variant

    Dim myArray() As String
    ReDim myArray(10, 0)


    LineIter = 0
    With New Scripting.FileSystemObject
        With .OpenTextFile(CMMData, ForReading)
            Do Until .AtEndOfStream

                LineIter = LineIter + 1

                ReDim Preserve myArray(10, LineIter)

                If LineIter <= 4 Then
                    myArray(1,LineIter) = "empty" & LineIter
                    .SkipLine
                Else

                    LineData = .ReadLine
                    SplitData = Split(LineData, ",")

                    Dim x As Long, bFound As Boolean

                    bFound = False
                    For x = LBound(myArray) To UBound(myArray)

                        If myArray(1, x) = SplitData(1) Then 'look if Set already exists
                            bFound = True
                            Exit For
                        End If

                    Next

                    If Not bFound Then 'if its not in array already, then add it

                        myArray(0, LineIter) = SplitData(0)
                        myArray(1, LineIter) = SplitData(1)
                        myArray(2, LineIter) = SplitData(14)
                        myArray(3, LineIter) = SplitData(96)
                        myArray(4, LineIter) = SplitData(97)
                        myArray(5, LineIter) = SplitData(98)
                        myArray(6, LineIter) = SplitData(99)
                        myArray(7, LineIter) = SplitData(134)
                        myArray(8, LineIter) = SplitData(135)
                        myArray(9, LineIter) = SplitData(136)
                        myArray(10, LineIter) = SplitData(137)

                    Else

                        myArray(1, LineIter) = "empty" & LineIter

                    End If

                End If

            Loop
            .Close
        End With

    End With

End Sub

答案 1 :(得分:0)

研究过您的代码后,我认为集合或词典都不合适。我已经提供了一个答案,我将如何解决您的要求。如果你问的话,我会在馆藏和词典上添加一些内容,但我怀疑这个答案包含的内容足以供你学习。

首先我需要一些测试数据。从空工作表开始,我使用唯一值填充第1行到第10001行以及第1列到第155列。我将列B设置为重复值“A”到“Z”。我将该数据导出为名为“Import.csv”的CSV文件。

不要重新发明轮子。 Excel有一个非常适合导入CSV文件的例程,因此不需要在VBA中编写自己的例程。我很少使用CSV文件,因此不记得调用导入例程所需语句的VBA语法。我打开宏录制器,导入CSV文件(前4行除外)并关闭宏录制器。我整理了宏录制器的代码,以构成我日常工作的第一部分。

宏录制器创建语法正确的代码,但不是良好的练习代码。它不知道你的目标所以记录下你做的事情。我怀疑你有155列,你可能希望为某些列指定“常规”以外的格式。您必须使用您的数据重做手动导入,并按照我的方式整理代码。

为我录制的代码的开头是:

With ActiveSheet.QueryTables.Add(Connection:= _
    "TEXT;C:\Users\Admin\Desktop\Import.csv", Destination:=Range("A1"))
    .Name = "Import" 

我整理了这个以获得:

With WshtIn
.Cells.EntireRow.Delete         ' Delete existing content
  With .QueryTables.Add(Connection:="TEXT;" & ActiveWorkbook.Path & _
                                    "\Import.csv", Destination:=.Range("A1"))
    .Name = "DataIn"

最好避免使用ActiveSheet。我已经指定了一个Worksheet类型的变量WshtIn,并将其设置为我想要使用的工作表。

原始连接字符串TEXT;C:\Users\Admin\Desktop\Import.csv是单个文字,我已用表达式替换。

我几乎总是将我的工作簿和他们处理的文件放在同一个文件夹中。 ActiveWorkbook.Path为我提供了工作簿的文件夹。通过使用它作为我的文件夹名称,我可以将文件移动到新文件夹,代码仍然有效。

Destination:=Range("A1")依赖于ActiveSheet内的目的地。在开头添加句点以创建.Destination:=Range("A1")表示目标位于With WshtIn定义的工作表中。

最后,我已将.Name = "Import"替换为.Name = "DataIn",因为我不想要为CSV文件命名的工作表。

剩下的代码我保持不变,除了最后一个额外的End With。正如我所说,您必须使用适合您需求的代码替换我的导入代码。我建议您在打开宏录制器的情况下导入CSV文件。根据录制的代码启动一个新的宏并进行播放,直到您在查看我的代码的下一位之前获得宏来导入CSV。

您只需要此CSV文件的11列。所以我编写了一个循环,将这11列移动到一个新的工作表“DataKeep”。宏录制器不会循环,所以不会有任何帮助;你必须知道编码这个的相关语法。我使用数组来定义要移动的列。我相信我已正确指定了列,但您需要检查。可能最好将此代码添加到您的宏中并在继续之前正确使用。

工作表“DataKeep”仅包含您想要的数据。我导入它时丢弃了前4行,我只保留了感兴趣的列。在常规程序中,你会发现:

Data = .Range(…).Value

这将范围的内容作为二维数组加载到Data中。对于大多数数组,约定是将列作为第一维,将行作为第二维,以便可以使用ReDim Preserve来增加行数。但是,对于从工作表加载的数组或准备下载到工作表的数组,第一个维度用于行,第二个维度用于列。这很有用,因为它匹配单元格的语法:Cells(RowNumber, ColNumber).

在我的宏的末尾,我展示了如何通过显示前20行来访问数据。这不是您的代码中的集合列表,但我相信您会发现一个更方便的数组。

获取“设置”列的唯一值列表的最简单方法是使用高级自动过滤器。这是我不经常使用的东西,我的指尖没有正确的语法。当我创建一组唯一集合时,我使用宏录制器捕获必要的代码。我将该列表中的可见单元格作为数组复制到UniqueSets。我相信我的代码无需修改即可满足您的要求,但您可能希望尝试使用宏录制器创建此代码作为培训练习。

完成我的代码,根据需要更新代码,并研究我如何实现我的效果。代码中有更多说明和解释。尽可能回答问题但是,你越了解自己,你的发展就越快。

Option Explicit

  '   Constants allow you to name columns rather than use numbers or letters that
  ' may change. If the position of a column changes, amend the Const statement and
  ' the code is fully updated.  Searching code for the old column number so it can
  ' be updated to the new can be a nightmare.
  '   I have guessed names for the columns based on your code. Change as necessary.
  '   ColKeepSet is the only one I use.
  Const ColKeepSN As Long = 1
  Const ColKeepSet As Long = 2
  Const ColKeepFF As Long = 3
  Const ColKeepVHCC As Long = 4
  Const ColKeepVHCCMID As Long = 5
  Const ColKeepVHCVMID As Long = 6
  Const ColKeepVHCV1 As Long = 7
  Const ColKeepVHCV2 As Long = 8
  Const ColKeepVHCV3 As Long = 9
  Const ColKeepVHCV4 As Long = 10
  Const ColKeepVHCV5 As Long = 11
Sub Import()

  Dim ColInCrnt As Variant
  Dim ColKeepCrnt As Long
  Dim ColKeepLast As Long
  Dim ColWidths() As Long
  Dim Data As Variant
  Dim Headings As Variant
  Dim RngFilter As Range
  Dim RngUnique As Range
  Dim RowKeepCrnt As Long
  Dim RowKeepLast As Long
  Dim RowKeepSetLast As Long
  Dim RowUnqCrnt As Long
  Dim UniqueSets As Variant
  Dim WshtIn As Worksheet
  Dim WshtKeep As Worksheet

  ' Change the names of the worksheets as necessary
  Set WshtIn = Worksheets("DataIn")
  Set WshtKeep = Worksheets("DataKeep")

  ' Import the CSV file. Change "Import.csv" to your filename. Change folder if necessary.
  With WshtIn
    .Cells.EntireRow.Delete         ' Delete existing content
    With .QueryTables.Add(Connection:="TEXT;" & ActiveWorkbook.Path & "\Import.csv", Destination:=.Range("A1"))
      .Name = "DataIn"
      .FieldNames = True
      .RowNumbers = False
      .FillAdjacentFormulas = False
      .PreserveFormatting = True
      .RefreshOnFileOpen = False
      .RefreshStyle = xlInsertDeleteCells
      .SavePassword = False
      .SaveData = True
      .AdjustColumnWidth = True
      .RefreshPeriod = 0
      .TextFilePromptOnRefresh = False
      .TextFilePlatform = 850
      .TextFileStartRow = 5
      .TextFileParseType = xlDelimited
      .TextFileTextQualifier = xlTextQualifierDoubleQuote
      .TextFileConsecutiveDelimiter = False
      .TextFileTabDelimiter = False
      .TextFileSemicolonDelimiter = False
      .TextFileCommaDelimiter = True
      .TextFileSpaceDelimiter = False
      .TextFileColumnDataTypes = Array(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        .TextFileTrailingMinusNumbers = True
        .Refresh BackgroundQuery:=False
    End With
  End With

  ' Copy the required columns from worksheet "DataIn" to "DataKeep"
  WshtKeep.Rows.Delete      ' Discard any reviosu data
  ColKeepCrnt = 1
  For Each ColInCrnt In Array(1, 2, 15, 97, 98, 99, 100, 135, 136, 137, 138)
    WshtIn.Columns(ColInCrnt).Copy Destination:=WshtKeep.Cells(1, ColKeepCrnt)
    ColKeepCrnt = ColKeepCrnt + 1
  Next

  ' Delete contents of Worksheet "DataIn" which are no longer needed
  WshtIn.Rows.Delete

  With WshtKeep

    RowKeepSetLast = .Cells(Rows.Count, ColKeepSet).End(xlUp).Row
    Set RngFilter = .Range(.Cells(1, ColKeepSet), _
                           .Cells(RowKeepSetLast, ColKeepSet))

    .Columns(ColKeepSet).AutoFilter
    RngFilter.AdvancedFilter Action:=xlFilterInPlace, Unique:=True

    ' One copy of each unqiue set will be visible.  In addition row 1 will be visible
    ' because Excel assumes it is a header row.

    Set RngUnique = .Range(.Cells(2, ColKeepSet), _
                           .Cells(RowKeepSetLast, ColKeepSet)).SpecialCells(xlCellTypeVisible)

    Debug.Print RngUnique.Address
    UniqueSets = RngUnique.Value

    .Columns(ColKeepSet).AutoFilter     ' Clear

    '   There are various methods of finding the last used row and column. Above I have used
    ' .End(xlUp) which is the easiest method of finding the last row of a column. Your data
    ' is almost certainly rectangular so I could have assumed that the last row of the Set
    ' column is the last row of all columns. Since I have saved selected columns, I could
    ' have deduced the last column from that.  However, I have decided to show a different
    ' technique.
    '   Both of the following statements use Find to locate the last cell contaning a value.
    ' Both start the search "After" cell A1 and the search direction is "xlPrevious".
    ' The previous cell from A1 is the bottom, right cell so both searches got up and across
    ' until they find a cell with a value. In the first the search order is "xlByRows" and
    ' the second it is "xlByColumns". So the first find the first row with a value and the
    ' second the first column with a value. If the data is arranged in a neat rectangle, the
    ' last row and the last column will be for the same cell. But if the data is not a neat
    ' rectangle these statements will still the correct results.
    RowKeepLast = .Cells.Find(What:="*", After:=.Range("A1"), LookIn:=xlFormulas, _
                              SearchOrder:=xlByRows, SearchDirection:=xlPrevious).Row
    ColKeepLast = .Cells.Find(What:="*", After:=.Range("A1"), LookIn:=xlFormulas, _
                              SearchOrder:=xlByColumns, SearchDirection:=xlPrevious).Column

    Data = .Range(.Cells(1, 1), _
                  .Cells(RowKeepLast, ColKeepLast)).Value

  End With

  ' Output all the unique sets
  Debug.Print "Unique Sets"
  For RowUnqCrnt = 1 To UBound(UniqueSets, 1)
    Debug.Print UniqueSets(RowUnqCrnt, 1)
  Next

  ' Output the first 20 rows of the data

  ' This will ReDim Headings as an array with a lower bound of 0
  Headings = VBA.Array("SN", "Set", "FF", "VHCC", "VHCCMID", "VHCVMID", _
                       "VHCV1", "VHCV2", "VHCV3", "VHCV4", "VHCV5")

  ReDim ColWidths(1 To UBound(Data, 2))

  ' Caluclate maximum width of each column
  For ColKeepCrnt = 1 To UBound(Data, 2)
    ColWidths(ColKeepCrnt) = Len(Headings(ColKeepCrnt - 1))
  Next
  For RowKeepCrnt = 1 To 20         ' Replace 20 by Ubound(Data, 1) to include all rows
    For ColKeepCrnt = 1 To ColKeepLast
      If ColWidths(ColKeepCrnt) < Len(Data(RowKeepCrnt, ColKeepCrnt)) Then
        ColWidths(ColKeepCrnt) = Len(Data(RowKeepCrnt, ColKeepCrnt))
      End If
    Next
  Next

  ' Output data
  Debug.Print "Data"
  Debug.Print "|";
  For ColKeepCrnt = 1 To ColKeepLast
    Debug.Print PadR(Headings(ColKeepCrnt - 1), ColWidths(ColKeepCrnt)) & "|";
  Next
  Debug.Print
  For RowKeepCrnt = 1 To 20
    Debug.Print "|";
    For ColKeepCrnt = 1 To ColKeepLast
      Debug.Print PadR(Data(RowKeepCrnt, ColKeepCrnt), ColWidths(ColKeepCrnt)) & "|";
    Next
    Debug.Print
  Next

End Sub
Function PadR(ByVal Str As String, ByVal PadLen As Long) As String

  If Len(Str) >= PadLen Then
    ' Do not truncate over length strings
    PadR = Str
  Else
    PadR = Left$(Str & Space(PadLen), PadLen)
  End If

End Function