Excel VBA集合合并排序

时间:2015-08-09 21:59:03

标签: excel excel-vba sorting collections mergesort vba

我正在尝试直接在Collection上实现MergeSort。这是从用于C ++的伪代码移植而来的。但是,MergeSort方法不返回任何数据。我的测试用例是使用{1,2,2,3,3,4}的输入集合,并返回一个Count = 0的Collection。问题是removeDupl = True和removeDupl = False。代码下面是一些调试日志的结果,这些日志似乎显示了mergesort在列表的3个成员中部分执行。为什么方法没有返回值?

Private Function mergeSort(col As Collection, Optional removeDupl = True) As Collection
'
'Execute a Merge sort
'removeDupl = True yields a sorted collection with unique values
'removeDupl = False yields a sorted collection with non-unique values
'

If col.Count = 1 Then

    Set mergeSort = col

Else
    Dim tempCol1 As Collection
    Dim tempCol2 As Collection
    Set tempCol1 = New Collection
    Set tempCol2 = New Collection

    For i = 1 To col.Count / 2

        tempCol1.Add col.Item(i)
        tempCol2.Add col.Item(i + (col.Count / 2))

    Next i

    Set tempCol1 = mergeSort(tempCol1)
    Set tempCol2 = mergeSort(tempCol2)

    Set mergeSort = merge(tempCol1, tempCol2, removeDupl)
End If
End Function

Private Function merge(col1 As Collection, col2 As Collection, ByVal removeDupl As Boolean) As Collection

If removeDupl = True Then
    On Error Resume Next
End If

Dim tempCol As Collection
Set tempCol = New Collection
Do While col1.Count <> 0 And col2.Count <> 0

    If col1.Item(1) > col2.Item(1) Then

        If removeDupl = True Then
            tempCol.Add col2.Item(1), col2.Item(1)
        Else
            tempCol.Add col2.Item(1)
        End If
        col2.Remove (1)

    Else

        If removeDupl = True Then
            tempCol.Add col1.Item(1), col1.Item(1)
        Else
            tempCol.Add col1.Item(1)
        End If
        col1.Remove (1)

    End If

  Loop


  Do While col1.Count <> 0

    If removeDupl = True Then
        tempCol.Add col1.Item(1), col1.Item(1)
    Else
        tempCol.Add col1.Item(1)
    End If
    col1.Remove (1)

  Loop

  Do While col2.Count <> 0

    If removeDupl = True Then
        tempCol.Add col2.Item(1), col2.Item(1)
    Else
        tempCol.Add col2.Item(1)
    End If
    col2.Remove (1)

  Loop

On Error GoTo 0
Set merge = tempCol
End Function
mergeSort Called

--col.Count = 6
----col.Item(1 + col.Count / 2) = 2
----col.Item(1) = 1
----col.Item(2 + col.Count / 2) = 3
----col.Item(2) = 2
----col.Item(3 + col.Count / 2) = 4
----col.Item(3) = 3

mergeSort Called

--col.Count = 3
----col.Item(1 + col.Count / 2) = 2
----col.Item(1) = 1

mergeSort Called

--col.Count = 1

mergeSort Called

--col.Count = 1

merge called

--col1.Count = 1
--col2.Count = 1

1 compared to 2

----1 Added
----2 Added

mergeSort Called

--col.Count = 3
----col.Item(1 + col.Count / 2) = 3
----col.Item(1) = 2

mergeSort Called

--col.Count = 1

mergeSort Called

--col.Count = 1

merge called

--col1.Count = 1
--col2.Count = 1

2 compared to 3

----2 Added
----3 Added

merge called

--col1.Count = 0
--col2.Count = 0

2 个答案:

答案 0 :(得分:1)

@xidgel是正确的:它适用于字符串。 “On Error Resume Next”语句隐藏了2个错误:

  • 错误457:此密钥已与此集合的元素相关联(预期)

  • 错误:13:类型不匹配

使用数字将它们转换为字符串(将空字符串附加到它们(“”))

Option Explicit

Private Function mergeSort(c As Collection, Optional uniq = True) As Collection

    Dim i As Long, xMax As Long, tmp1 As Collection, tmp2 As Collection, xOdd As Boolean

    Set tmp1 = New Collection
    Set tmp2 = New Collection

    If c.Count = 1 Then
        Set mergeSort = c
    Else

        xMax = c.Count
        xOdd = (c.Count Mod 2 = 0)
        xMax = (xMax / 2) + 0.1     ' 3 \ 2 = 1; 3 / 2 = 2; 0.1 to round up 2.5 to 3

        For i = 1 To xMax
            tmp1.Add c.Item(i) & "" 'force numbers to string
            If (i < xMax) Or (i = xMax And xOdd) Then tmp2.Add c.Item(i + xMax) & ""
        Next i

        Set tmp1 = mergeSort(tmp1, uniq)
        Set tmp2 = mergeSort(tmp2, uniq)

        Set mergeSort = merge(tmp1, tmp2, uniq)

    End If
End Function
Private Function merge(c1 As Collection, c2 As Collection, _
                       Optional ByVal uniq As Boolean = True) As Collection

    Dim tmp As Collection
    Set tmp = New Collection

    If uniq = True Then On Error Resume Next    'hide duplicate errors

    Do While c1.Count <> 0 And c2.Count <> 0
        If c1.Item(1) > c2.Item(1) Then
            If uniq Then tmp.Add c2.Item(1), c2.Item(1) Else tmp.Add c2.Item(1)
            c2.Remove 1
        Else
            If uniq Then tmp.Add c1.Item(1), c1.Item(1) Else tmp.Add c1.Item(1)
            c1.Remove 1
        End If
    Loop

    Do While c1.Count <> 0
        If uniq Then tmp.Add c1.Item(1), c1.Item(1) Else tmp.Add c1.Item(1)
        c1.Remove 1
    Loop
    Do While c2.Count <> 0
        If uniq Then tmp.Add c2.Item(1), c2.Item(1) Else tmp.Add c2.Item(1)
        c2.Remove 1
    Loop
    On Error GoTo 0

    Set merge = tmp

End Function

试验:

Public Sub testInts()
    Dim tmp As Collection: Set tmp = New Collection

    tmp.Add 3: tmp.Add 1: tmp.Add 4
    'if next line (2) is commented out:     if dupes: "1,3,4,4"  if uniques: "1,3,4"
    tmp.Add 2                    'else:     if dupes: "1,2,3,4,4 if uniques: "1,2,3,4"
    tmp.Add 4
    Set tmp = mergeSort(tmp, False)
End Sub

Public Sub testStrings()
    Dim tmp As Collection: Set tmp = New Collection

    tmp.Add "C": tmp.Add "A": tmp.Add "D"
    'if next line ("B") is commented out:   if dupes: "A,C,D,D"  if uniques: "A,C,D"
    'tmp.Add "B"         'else:             if dupes: "A,B,C,D,D" if uniques: "A,B,C,D"
    tmp.Add "D"
    Set tmp = mergeSort(tmp, False)
End Sub

'------------------------------------------------------------------------------------------

答案 1 :(得分:0)

在2011年写了a blog article on this exact subject ...我的代码可以免费使用。我的代码的一个特别有用的功能是:它可以用于通过命名属性对对象集合进行排序。

Attribute VB_Name = "Collections"
Option Compare Database
Option Explicit
' Note that STRING INDEXED ARRAYS are called "Dictionary".  Available from Windows Scripting Runtime.
' SORTING ARRAYS OF User Defined Types: http://www.dailydoseofexcel.com/archives/2006/02/23/sorting-arrays-of-user-defined-types/
' For HeapSort: http://www.source-code.biz/snippets/vbasic/6.htm

'***********************************************************************************************
'THE MERGESORT ALGORITHM FOR SORTING IN O(n.log(n)) TIME - Applied to VBA COLLECTION objects...
'***********************************************************************************************
' © 2005-2011 Matthew Slyman. Copying, modification and distribution in software is permitted.
' Attribution of work to author is required, and unauthorised redistribution is not permitted.
' Copyright notice must remain intact.
Public Function MergeSortCollection(ByRef CollectionToSort As Collection, Optional ByVal OrderByProperty As String, Optional ByVal OrderByType As String, Optional ByVal InDescendingOrder As Boolean = False, Optional DISTINCT As Boolean = False) As Collection ' Optional CompareMode As VbCompareMethod = vbTextCompare ' - potentially useful for Strings ''' Optional identify_by_obj_guid As Boolean = True ' - alternative being to identify by Parameters.  See below under "=Potentially fixable weaknesses of this routine:==="
' >>> What about ORDERing by Array or Collection of properties?
On Error GoTo Failed
    If CollectionToSort.Count > 1 Then
        If LenB(OrderByType) = 0 Then ' If sorting by a Variant, the OrderByType parameter enables the programmer to specify how to sort it (numerical or string based sorting).  Otherwise, the VBA code below can automatically detect the data type of the sorting/comparison variable.
            Dim testVar As Variant ' <<< Should perhaps be using the IsObject function...  Investigate whether this would result in a more reliable SortByMerge function.  Think about the potential use of default Value.
            If LenB(OrderByProperty) = 0 Then
                testVar = CollectionToSort(1)
            Else
                testVar = CollectionToSort(1).Properties(OrderByProperty)
            End If
            OrderByType = TypeName(testVar)
        End If
        ' >>> Need to think about USER-DEFINED TYPES!  And how to use Properties() in them!  Remember that user-defined types are NOT Objects...  << Actually, Collection objects themselves do not appear to handle UDTs (user-defined types) gracefully at all - so it is very unlikely that someone would be using this routine on a Collection of UDT-variables at all.
        Select Case OrderByType ' VarType function results: vbNull; vbInteger; vbLong; vbSingle; vbDouble; vbCurrency; vbDate; vbString; vbObject; vbError; vbBoolean; vbVariant; vbDataObject; vbDecimal; vbByte; vbUserDefinedType; vbArray
            Case "Single", "Double", "String", "Integer", "Long", "Byte", "Currency", "Decimal", "Date": ' Boolean?
            Case Else:
                Err.Raise number:=vbObjectError + 1, Source:="AAA.Collections.MergeSortCollection", Description:="OrderBy Type not recognized.  Use Single, Double, String, Integer, Long, Byte, Currency, Decimal or Date"
        End Select
        ' <<< Might push the stuff above this line into a separate initialization function, for efficiency reasons.
    End If
    Dim SortedCollection As New Collection
    Select Case CollectionToSort.Count
        Case 0, 1:
            Set MergeSortCollection = CollectionToSort
        Case Else:
            Dim Size1 As Long, Size2 As Long, CollectionToSortSize As Long, counter As Long
            Dim Collection1 As New Collection, Collection2 As New Collection
            CollectionToSortSize = CollectionToSort.Count
            Size1 = Round(CollectionToSortSize / 2, 0)
            Size2 = CollectionToSortSize - Size1
            For counter = 1 To CollectionToSort.Count
                If counter <= Size1 Then Collection1.Add CollectionToSort(counter) Else Collection2.Add CollectionToSort(counter)
            Next counter
            Set MergeSortCollection = MergeInOrder(MergeSortCollection(Collection1, OrderByProperty, OrderByType, InDescendingOrder, DISTINCT), MergeSortCollection(Collection2, OrderByProperty, OrderByType, InDescendingOrder, DISTINCT), OrderByProperty, OrderByType, InDescendingOrder, DISTINCT)
    End Select
    Exit Function
Failed:
    Debug.Print "#ERROR# " & Err.number & " : " & Err.Source & ".Collections.MergeSortCollection " & vbCrLf & " - " & Err.Description
    Err.Clear
End Function
Private Function MergeInOrder(ByRef Collection1 As Collection, ByRef Collection2 As Collection, Optional OrderByProperty As String = "", Optional OrderByType As String = "String", Optional ByVal InDescendingOrder As Boolean = False, Optional DISTINCT As Boolean = False) As Collection
' The other half of the MERGESORT algorithm, for COLLECTIONS...  An auxiliary function for the recursive MergeSort function...  The first function splits the Collections successively into halves, and then this function merges the halves in order, successively, until the resulting sorted Collection is returned.
' >> NEED to use . dot delimited Properties for multiple levels of objects...  Could also replace with Collection.  Automatically determine the types of those properties.  Sort accordingly.
' >>> Yet to rigorously test sorting stability (to see whether function preserves original ordering as far as possible).&nbsp; Appears to do so...  Just want to make sure...
' >>> Yet to rigorously test worst-case space complexity.  Appears to be O(n) but just want to make sure it is in practice...
    Dim SortedCollection As New Collection
    Dim Counter1 As Long, Counter2 As Long
    Counter1 = 1
    Counter2 = 1
    Dim ComparisonFlag As Boolean
    Do While Counter1 <= Collection1.Count And Counter2 <= Collection2.Count
        Dim ComparisonVariable1 As Variant, ComparisonVariable2 As Variant
        If DISTINCT Then
            Dim IdenticalNodes As Boolean ' Not necessary to compare e.g. Collection1(1) with Collection1(2) because Collection1 itself will already have been split and merged, and recursively tested for identical elements via this MergeInOrder function.
            If Not (LenB(OrderByProperty) <> 0) Then ' <<< Should perhaps be using the IsObject function...  Investigate whether this would result in a more reliable SortByMerge function.
                IdenticalNodes = (Collection1(Counter1) = Collection2(Counter2))
            Else
                IdenticalNodes = (Collection1(Counter1) Is Collection2(Counter2))
            End If
            If IdenticalNodes Then
                SortedCollection.Add Collection1(Counter1)
                Counter1 = Counter1 + 1 ' Already inserted into SortedCollection.
                Counter2 = Counter2 + 1 ' Pass over the duplicate.
                GoTo SkipComparison
            End If
        End If
        If Not (LenB(OrderByProperty) <> 0) Then
            ComparisonVariable1 = Collection1(Counter1)
            ComparisonVariable2 = Collection2(Counter2)
        Else
            ComparisonVariable1 = Collection1(Counter1).Properties(OrderByProperty)
            ComparisonVariable2 = Collection2(Counter2).Properties(OrderByProperty)
        End If
        Select Case OrderByType ' Using a text-based parameter, rather than automatically detecting type,
            Case "Boolean": ComparisonFlag = CBool(ComparisonVariable1) < CBool(ComparisonVariable2) ' << WARNING: Numeric representation of "True" constant depends on system implementation.  e.g. VBA (INT -1) differs from SQL Server (BIT 1) in this respect.  Is TRUE<FALSE or is FALSE>TRUE?
            Case "Single": ComparisonFlag = CSng(ComparisonVariable1) < CSng(ComparisonVariable2)
            Case "Double": ComparisonFlag = CDbl(ComparisonVariable1) < CDbl(ComparisonVariable2)
            Case "String": ComparisonFlag = (-1 = Strings.StrComp(CStr(ComparisonVariable1), CStr(ComparisonVariable2), vbTextCompare))
            Case "Integer", "Long", "Byte": ComparisonFlag = CLng(ComparisonVariable1) < CLng(ComparisonVariable2)
            Case "Currency": ComparisonFlag = CCur(ComparisonVariable1) < CCur(ComparisonVariable2) ' What about comparison of dissimilar currencies in heterogeneous forex environment?
            Case "Decimal": ComparisonFlag = CDec(ComparisonVariable1) < CDec(ComparisonVariable2)
            Case "Date": ComparisonFlag = CDate(ComparisonVariable1) < CDate(ComparisonVariable2)
        End Select
        If InDescendingOrder Then ComparisonFlag = Not ComparisonFlag
        If ComparisonFlag Then
            SortedCollection.Add Collection1(Counter1)
            Counter1 = Counter1 + 1
        Else
            SortedCollection.Add Collection2(Counter2)
            Counter2 = Counter2 + 1
        End If
SkipComparison:
    Loop
    Do While Counter1 <= Collection1.Count
        SortedCollection.Add Collection1(Counter1)
        Counter1 = Counter1 + 1
    Loop
    Do While Counter2 <= Collection2.Count
        SortedCollection.Add Collection2(Counter2)
        Counter2 = Counter2 + 1
    Loop
    Set Collection1 = Nothing
    Set Collection2 = Nothing
    Set MergeInOrder = SortedCollection
'    Set SortedCollection = Nothing ' Would this not muck up the results of the function?  Remember, MergeInOrder is still set by Object Ref to SortedCollection.  They are essentially the same object...
End Function
' END OF MERGESORT FOR COLLECTIONS