合并排序与差距

时间:2013-10-01 13:49:33

标签: sorting python-3.x merge mergesort

假设我有以下排序列表:

a = ['8EF5CD1B', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222']
b = ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', '4EFA3222']
c = ['96276D30', 'B1392DB3', 'BD23F32A', '59770CD6']

我希望它们通过填充优先级较低的列表中的间隙进行合并排序。

>>> from itertools import permutations
>>> LISTS = (a, b, c)
>>> for (first, second) in permutations(LISTS, 2):
...     print((LISTS.index(first), LISTS.index(second)), magic(first, second))
...
(0, 1) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222']
(0, 2) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222']
(1, 0) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222']
(1, 2) ['8EF5CD1B', '96276D30', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222']
(2, 0) ['96276D30', '8EF5CD1B', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222']
(2, 1) ['8EF5CD1B', '96276D30', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222']
>>>magic(*LISTS)
['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222']

正如您在(0,1)中看到的那样96276D30到达第二位,因为b列表填补了空白。在配置订单的情况下,优先级转到第一个参数。魔术函数应该使用两个以上的参数,以及上面的例子。我制作了一个有效的代码,但对于这样一个看似简单的任务来说,它太丑了(而且可能太慢了)。

MAX_ITERATIONS = 1000
class UnjoinableListsError(Exception): pass

def magic(*lists, iterations=MAX_ITERATIONS):
    """
    Returns a joint sorted list of presorted lists (or tuples).

    First it checks for common items, then it defines a gap list to put
    non-commons in. Finally it mixes them all. If items of more presorted
    list (or tuple) competes for a gap place, they will sorted in order
    of their parents were in arguments.
    """
    def sort_two(first, second):
        commons = [item for item in first if item in second]
        gap_list = [[] for i in range(len(commons)+1)]
        for l in (first, second):
            gap_item = []
            sliced = []
            for common_item in commons:
                common_i = l.index(common_item)
                sliced.append((list(l[:common_i]), list(l[common_i+1:])))
            gap_item.append(sliced[0][0])
            for j in range(len(sliced) - 1):
                gap_item.append([item for item in sliced[j][1]
                                    if item in sliced[j+1][0]])
            gap_item.append(sliced[-1][1])
            for j, item in enumerate(gap_item):
                gap_list[j].extend([i for i in item if i not in commons])
        result = []
        result.extend(gap_list[0])
        for i in range(len(commons)):
            result.append(commons[i])
            result.extend(gap_list[i+1])
        return result

    result = lists[0]
    index_set = {i for i in range(1, len(lists))}
    it = iterations
    while index_set and it > 0:
        it -= 1
        if it == 0:
            raise UnjoinableListsError('The lists at argument index {}'+
                'are unjoinable.'.format(str(index_set)))
        i = index_set.pop()
        try:
            result = sort_two(result, lists[i])
        except:
            index_set.add(i)
    return result

我错过了一些清晰易懂的解决方案吗?谢谢你的回答。

1 个答案:

答案 0 :(得分:0)

好吧,没有回答让我这么做。以下是可以很好地运行的代码:

def joint_sorted(*sequences):
    """Sorts two or more presorted sequences. The priority is in
decreasing order for the case of unambiguous elem order.

>>> joint_sorted([1,3,4,5,6], [1,2,4,6,7], [6, 10, 11], [12, 11, 17])
[1, 3, 2, 4, 5, 6, 7, 10, 12, 11, 17]
>>> joint_sorted('adgth', 'dbgjhk')
'adbgtjhk'"""
    def for_two(first_seq, second_seq):
        first_set, second_set = set(first_seq), set(second_seq)
        if (len(first_seq) != len(first_set)
            or len(second_seq) != len(second_set)):
            raise TypeError("The sequences must contain "
                "unique elems only!")
        common_elems = first_set & second_set
        before, buf = {}, []
        for i, e in iter(enumerate(second_seq)):
            if e in common_elems:
                before[e], buf = buf, []
            else:
                buf.append(e)
        result = []
        for e in first_seq:
            if e in before:
                result.extend(before[e])
            result.append(e)
        result.extend(buf)
        if isinstance(first_seq, str):
            return ''.join(result)
        return first_seq.__class__(result)
    first_seq = sequences[0]
    for i in range(1, len(sequences)):
        first_seq = for_two(first_seq, sequences[i])
    return first_seq