我已经实现了一些排序算法,包括插入,选择,外壳,两种合并。我发现我的工具的性能与对Algorithms(4th)的描述不符。 例如,这是两种合并排序。排序包含100,000个元素的列表时,Merge1大约需要0.6s,Merge2大约需要50 + s。但是Merge2与Algorithms(4th)中的Merge2几乎相同,但我使用的是python。我无法弄清楚为什么Merge2这么慢以及如何改进它。有人可以帮我吗?谢谢!
class Merge1:
def merge(self, a, b):
i = 0; j = 0
res = []
while i < len(a) and j < len(b):
if a[i] < b[j]:
res.append(a[i])
i = i + 1
else:
res.append(b[j])
j = j + 1
res = res + a[i:] + b[j:]
return res
def sort(self, source):
if len(source) <= 1:
return source
half = len(source) // 2
left = self.sort(source[:half])
right = self.sort(source[half:])
retval = self.merge(left, right)
return retval
def is_sort(self, source):
length = len(source)
for i in range(0, length-1):
if source[i] > source[i+1]:
return False
return True
class Merge2:
def merge(self, source, lo, mid ,hi):
i = lo
j = mid + 1
aux = source[:]
k = lo
while k <= hi:
if i > mid:
source[k] = aux[j]
j = j + 1
elif j > hi:
source[k] = aux[i]
i = i + 1
elif aux[i] < aux[j]:
source[k] = aux[i]
i = i + 1
else:
source[k] = aux[j]
j = j + 1
k = k+1
def sort(self, source):
sz = 1
N = len(source)
while sz < N:
for lo in range(0, N-sz, sz+sz):
# pdb.set_trace()
self.merge(source, lo, lo+sz-1, min(lo+sz+sz-1, N-1))
sz = sz + sz
def is_sort(self, source):
length = len(source)
for i in range(0, length-1):
if source[i] > source[i+1]:
return False
return True
这是测试代码:
merge1 = Merge1()
source = np.random.randint(100000, size=100000).tolist()
start = time.time()
merge1.sort(source)
end = time.time()
print("Merge1 takes: {}s".format(end-start))
merge2 = Merge2()
source = np.random.randint(100000, size=100000).tolist()
start = time.time()
merge2.sort(source)
end = time.time()
print("Merge2 takes: {}s".format(end-start))
结果: E:> python sort.py Merge1需要:0.6376256942749023s Merge2需要:57.99568271636963s
答案 0 :(得分:1)
考虑此修改。根据我的快速测试,它显着提高了性能(从近一分钟缩短到不到1秒)。主要的性能提升来自避免创建整个列表的多个副本。其他更改只会稍微提高性能。 根据总和的简单比较,它不会弄乱列表,但是如果您想使用它,则应该做更多测试。
class Merge4:
def merge(self, source, aux, lo, mid ,hi):
i = lo
j = mid + 1
a_j= aux[j]
a_i= aux[i]
k = lo
while k <= hi:
if i > mid:
source[k] = a_j
j += 1
a_j= aux[j]
elif j > hi:
source[k] = a_i
i += 1
a_i= aux[i]
elif a_i < a_j:
source[k] = a_i
i += 1
a_i= aux[i]
else:
source[k] = a_j
j += 1
a_j= aux[j]
k += 1
# update the aux array for the next call
aux[lo:hi+1]= source[lo:hi+1]
def sort(self, source):
sz = 1
N = len(source)
while sz < N:
sz_2= sz * 2
# create the aux array, that will be maintained continuously
# and add one extra None, so the "prefetching" works also
# during the last iteration (refering to a_i and a_j)
aux= source[:]
aux.append(None)
for lo in range(0, N-sz, sz_2):
# pdb.set_trace()
self.merge(source, aux, lo, lo+sz-1, min(lo+sz_2-1, N-1))
sz = sz_2
def is_sort(self, source):
length = len(source)
for i in range(0, length-1):
if source[i] > source[i+1]:
return False
return True