Question

mylists = [ [1,3,5,7], [2,4,7,8], [11,15], [20] ]
import time
def timeit(f):
    def wrapper(*args, **kw):
        t = time.time()
        res = f(*args, **kw)
        print "%s took %s" % (f.func_name, time.time() - t)
        return res

    return wrapper

# merge two given lists
def merge(l, r):
    if len(l) == 0:
        return r

    if len(r) == 0:
        return l

    if l[0] <= r[0]:
        return [l[0]] + merge(l[1:], r)
    else:
        return [r[0]] + merge(l, r[1:])

def merge2(x,y):
    if len(x) == 0:
        return y
    if len(y) == 0:
        return x

    #pop the lower one between the two biggest items
    last = y.pop() if x[-1] < y[-1] else x.pop()

    merged = merge2(x,y)
    merged.append(last)

    return merged

def merge3(xs, ys):
    ms = []
    i = 0
    j = 0

    while i < len(xs) and j < len(ys):
        if xs[i] <= ys[j]:
            ms.append(xs[i])
            i = i + 1
        else:
            ms.append(ys[j])
            j = j + 1 
    while i < len(xs) and j == len(ys):
        ms.append(xs[i])
        i = i + 1

    while i == len(xs) and j < len(ys):
        ms.append(xs[i])
        j = j + 1

    return ms

# divide and conquer
def lmerge(lists, m):
    if len(lists) <= 1:
        return lists

    mid = len(lists) / 2

    llists = lmerge(lists[:mid], m)
    rlists = lmerge(lists[mid:], m)

    # the bottom merge will have a list of list
    if isinstance(llists[0], list):
        llists = llists[0]
    if isinstance(rlists[0], list):
        rlists = rlists[0]

    return m(llists, rlists)

@timeit
def a():
    print lmerge(mylists, merge)

@timeit
def b():
    print lmerge(mylists, merge2)

@timeit
def c():
    print lmerge(mylists, merge3)

@timeit
def d():
    print sorted(reduce(lambda x,y: x + y, mylists))


a()
b()
c()
d()

antz@antz-90X3A:~/python/algo$ python addlists.py 
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
a took 7.00950622559e-05
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
b took 6.103515625e-05
Traceback (most recent call last):
  File "addlists.py", line 101, in <module>
    c()
  File "addlists.py", line 13, in wrapper
    res = f(*args, **kw)
  File "addlists.py", line 97, in c
    print lmerge(mylists, merge3)
  File "addlists.py", line 82, in lmerge
    if isinstance(rlists[0], list):
IndexError: list index out of range
antz@antz-90X3A:~/python/algo$

有点混淆为什么merge3的提升和索引错误？

merge，merge2，merge3应该有相同的输出（至少在我的脑海中）所以我不明白为什么它引发了一个IndexError，因为它适用于merge和merge2

编辑：如果有合并算法，我还能有更好的合并算法吗？

Answer 1

我认为merge3()结尾有一个错误。由于i == len(xs)超出范围，ms.append(xs[i])总是会因xs[i]而失败。

while i == len(xs) and j < len(ys):
    #ms.append(xs[i]) # Possibly wrong.
    ms.append(ys[j]) # I think that is what you want instead.
    ....

您不会再获得索引超出范围错误，并且您的算法现在应该可以正常工作。

Answer 2

这里有几个问题。调试是一件大事，特别是当它是你自己的代码时。我给你的第一个建议是开始在你的代码中放置print语句，看看问题出在哪里以及出了什么问题。如果您希望“mylists”之类的内容看起来总是一样，请查看它，看看它是否会发生变化。

在这里说的是正在发生的事情：

我的第一个建议是检查每次合并中“mylists”会发生什么。现在您认为问题出在merge3中。但是在你达到这一点之前，确实会有一些时髦的事情发生。

您正在merge2中使用merge2的递归调用。哪个很棒，因为递归很棒。但是当你到达任何一个列表的末尾会发生什么？列表传递给函数时会发生什么？如果merge2到达x或y的末尾，则返回x或y。但是什么是x或y？好吧，他们是你传入python的原始列表，随着时间的推移已被修改。在merge2中，您实际上正在更改传入的列表。但是，您可能会说，当我复制列表时会发生同样的事情！是的，让我们使用标准的temp_lists = list（mylist）复制列表，并检查传入的所有内容的ID。注释掉（），c（）和d（）并将b（）更改为这个并检查出来：

@timeit
def b():
    temp_lists = []
    temp_lists = list(mylists)  #making our copy here
    print "in b"
    print "id of temp_lists is: ", id(temp_lists)
    print "id of mylists is: ", id(mylists)
    print "id of temp_lists[0] is: ", id(temp_lists[0])
    print "id of mylists[0] is: ", id(mylists[0])
    print lmerge(temp_lists, merge2)
    print "after b"
    print "id of temp_lists[0] is: ", id(temp_lists[0])
    print "id of mylists[0] is: ", id(mylists[0])

输出应如下所示：

$ python merge_play.py 
in b
id of temp_lists is:  140392743662368
id of mylists is:  140392743662872
id of temp_lists[0] is:  140392743662512
id of mylists[0] is:  140392743662512
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
after b
id of temp_lists[0] is:  140392743662512
id of mylists[0] is:  140392743662512
b took 0.000180959701538

请注意，temp_list和mylist的id不同，但temp_list [0]和mylist [0]的id是相同的。这意味着当你从每个列表中弹出东西来合并它们时，你正在修改原始列表。有几种方法可以解决这个问题。查看copy.deepcopy（），或者根据此问题的上下文，编写自己的方法来递归复制列表。

您还有其他问题（例如，如果您在我的列表中传入一个空列表会发生什么？）但是，现在，您的问题在于如何处理传递给merge2的列表。

Answer 3

除非我错过了什么，否则很容易做到：

mylists = [ [1,3,5,7], [2,4,7,8], [11,15], [20] ]
newlist = []
for elem in mylists:
    newlist.extend(elem)
newlist = sorted(newlist)

Python合并列表

3 个答案: