mylists = [ [1,3,5,7], [2,4,7,8], [11,15], [20] ]
import time
def timeit(f):
def wrapper(*args, **kw):
t = time.time()
res = f(*args, **kw)
print "%s took %s" % (f.func_name, time.time() - t)
return res
return wrapper
# merge two given lists
def merge(l, r):
if len(l) == 0:
return r
if len(r) == 0:
return l
if l[0] <= r[0]:
return [l[0]] + merge(l[1:], r)
else:
return [r[0]] + merge(l, r[1:])
def merge2(x,y):
if len(x) == 0:
return y
if len(y) == 0:
return x
#pop the lower one between the two biggest items
last = y.pop() if x[-1] < y[-1] else x.pop()
merged = merge2(x,y)
merged.append(last)
return merged
def merge3(xs, ys):
ms = []
i = 0
j = 0
while i < len(xs) and j < len(ys):
if xs[i] <= ys[j]:
ms.append(xs[i])
i = i + 1
else:
ms.append(ys[j])
j = j + 1
while i < len(xs) and j == len(ys):
ms.append(xs[i])
i = i + 1
while i == len(xs) and j < len(ys):
ms.append(xs[i])
j = j + 1
return ms
# divide and conquer
def lmerge(lists, m):
if len(lists) <= 1:
return lists
mid = len(lists) / 2
llists = lmerge(lists[:mid], m)
rlists = lmerge(lists[mid:], m)
# the bottom merge will have a list of list
if isinstance(llists[0], list):
llists = llists[0]
if isinstance(rlists[0], list):
rlists = rlists[0]
return m(llists, rlists)
@timeit
def a():
print lmerge(mylists, merge)
@timeit
def b():
print lmerge(mylists, merge2)
@timeit
def c():
print lmerge(mylists, merge3)
@timeit
def d():
print sorted(reduce(lambda x,y: x + y, mylists))
a()
b()
c()
d()
antz@antz-90X3A:~/python/algo$ python addlists.py
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
a took 7.00950622559e-05
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
b took 6.103515625e-05
Traceback (most recent call last):
File "addlists.py", line 101, in <module>
c()
File "addlists.py", line 13, in wrapper
res = f(*args, **kw)
File "addlists.py", line 97, in c
print lmerge(mylists, merge3)
File "addlists.py", line 82, in lmerge
if isinstance(rlists[0], list):
IndexError: list index out of range
antz@antz-90X3A:~/python/algo$
有点混淆为什么merge3的提升和索引错误?
merge,merge2,merge3应该有相同的输出(至少在我的脑海中)所以我不明白为什么它引发了一个IndexError,因为它适用于merge和merge2
编辑:如果有合并算法,我还能有更好的合并算法吗?
答案 0 :(得分:1)
我认为merge3()
结尾有一个错误。
由于i == len(xs)
超出范围,ms.append(xs[i])
总是会因xs[i]
而失败。
while i == len(xs) and j < len(ys):
#ms.append(xs[i]) # Possibly wrong.
ms.append(ys[j]) # I think that is what you want instead.
....
您不会再获得索引超出范围错误,并且您的算法现在应该可以正常工作。
答案 1 :(得分:1)
这里有几个问题。调试是一件大事,特别是当它是你自己的代码时。我给你的第一个建议是开始在你的代码中放置print语句,看看问题出在哪里以及出了什么问题。如果您希望“mylists”之类的内容看起来总是一样,请查看它,看看它是否会发生变化。
在这里说的是正在发生的事情:
我的第一个建议是检查每次合并中“mylists”会发生什么。现在您认为问题出在merge3中。但是在你达到这一点之前,确实会有一些时髦的事情发生。
您正在merge2中使用merge2的递归调用。哪个很棒,因为递归很棒。但是当你到达任何一个列表的末尾会发生什么?列表传递给函数时会发生什么?如果merge2到达x或y的末尾,则返回x或y。但是什么是x或y?好吧,他们是你传入python的原始列表,随着时间的推移已被修改。在merge2中,您实际上正在更改传入的列表。但是,您可能会说,当我复制列表时会发生同样的事情!是的,让我们使用标准的temp_lists = list(mylist)复制列表,并检查传入的所有内容的ID。注释掉(),c()和d()并将b()更改为这个并检查出来:
@timeit
def b():
temp_lists = []
temp_lists = list(mylists) #making our copy here
print "in b"
print "id of temp_lists is: ", id(temp_lists)
print "id of mylists is: ", id(mylists)
print "id of temp_lists[0] is: ", id(temp_lists[0])
print "id of mylists[0] is: ", id(mylists[0])
print lmerge(temp_lists, merge2)
print "after b"
print "id of temp_lists[0] is: ", id(temp_lists[0])
print "id of mylists[0] is: ", id(mylists[0])
输出应如下所示:
$ python merge_play.py
in b
id of temp_lists is: 140392743662368
id of mylists is: 140392743662872
id of temp_lists[0] is: 140392743662512
id of mylists[0] is: 140392743662512
[1, 2, 3, 4, 5, 7, 7, 8, 11, 15, 20]
after b
id of temp_lists[0] is: 140392743662512
id of mylists[0] is: 140392743662512
b took 0.000180959701538
请注意,temp_list和mylist的id不同,但temp_list [0]和mylist [0]的id是相同的。这意味着当你从每个列表中弹出东西来合并它们时,你正在修改原始列表。有几种方法可以解决这个问题。查看copy.deepcopy(),或者根据此问题的上下文,编写自己的方法来递归复制列表。
您还有其他问题(例如,如果您在我的列表中传入一个空列表会发生什么?)但是,现在,您的问题在于如何处理传递给merge2的列表。
答案 2 :(得分:1)
除非我错过了什么,否则很容易做到:
mylists = [ [1,3,5,7], [2,4,7,8], [11,15], [20] ]
newlist = []
for elem in mylists:
newlist.extend(elem)
newlist = sorted(newlist)