Question

我已经实现了一些排序算法，包括插入，选择，外壳，两种合并。我发现我的工具的性能与对Algorithms（4th）的描述不符。例如，这是两种合并排序。排序包含100,000个元素的列表时，Merge1大约需要0.6s，Merge2大约需要50 + s。但是Merge2与Algorithms（4th）中的Merge2几乎相同，但我使用的是python。我无法弄清楚为什么Merge2这么慢以及如何改进它。有人可以帮我吗？谢谢！

class Merge1:
    def merge(self, a, b):
        i = 0; j = 0
        res = []
        while i < len(a) and j < len(b):
            if a[i] < b[j]:
                res.append(a[i])
                i = i + 1
            else:
                res.append(b[j])
                j = j + 1
        res = res + a[i:] +  b[j:]
        return res

    def sort(self, source):
        if len(source) <= 1:
            return source
        half = len(source) // 2
        left = self.sort(source[:half])
        right = self.sort(source[half:])
        retval = self.merge(left, right)
        return retval

    def is_sort(self, source):
        length = len(source)
        for i in range(0, length-1):
            if source[i] > source[i+1]:
                return False
        return True

class Merge2:
    def merge(self, source, lo, mid ,hi):
        i = lo
        j = mid + 1
        aux = source[:]
        k = lo
        while k <= hi:
            if i > mid:
                source[k] = aux[j]
                j = j + 1
            elif j > hi:
                source[k] = aux[i]
                i = i + 1
            elif aux[i] < aux[j]:
                source[k] = aux[i]
                i = i + 1
            else:
                source[k] = aux[j]
                j = j + 1
            k = k+1

    def sort(self, source):
        sz = 1
        N = len(source)
        while sz < N:
            for lo in range(0, N-sz, sz+sz):
                # pdb.set_trace()
                self.merge(source, lo, lo+sz-1, min(lo+sz+sz-1, N-1))
            sz = sz + sz

    def is_sort(self, source):
        length = len(source)
        for i in range(0, length-1):
            if source[i] > source[i+1]:
                return False
        return True

以下是算法中的工具：

这是测试代码：

    merge1 = Merge1()
    source = np.random.randint(100000, size=100000).tolist()
    start = time.time()
    merge1.sort(source)
    end = time.time()
    print("Merge1 takes: {}s".format(end-start))


    merge2 = Merge2()
    source = np.random.randint(100000, size=100000).tolist()
    start = time.time()
    merge2.sort(source)
    end = time.time()
    print("Merge2 takes: {}s".format(end-start))

结果： E：> python sort.py Merge1需要：0.6376256942749023s Merge2需要：57.99568271636963s

Answer 1

考虑此修改。根据我的快速测试，它显着提高了性能（从近一分钟缩短到不到1秒）。主要的性能提升来自避免创建整个列表的多个副本。其他更改只会稍微提高性能。根据总和的简单比较，它不会弄乱列表，但是如果您想使用它，则应该做更多测试。

class Merge4:
    def merge(self, source, aux, lo, mid ,hi):
        i = lo
        j = mid + 1
        a_j= aux[j]
        a_i= aux[i]
        k = lo
        while k <= hi:
            if i > mid:
                source[k] = a_j
                j += 1
                a_j= aux[j]
            elif j > hi:
                source[k] = a_i
                i += 1
                a_i= aux[i]
            elif a_i < a_j:
                source[k] = a_i
                i += 1
                a_i= aux[i]
            else:
                source[k] = a_j
                j += 1
                a_j= aux[j]
            k += 1
        # update the aux array for the next call
        aux[lo:hi+1]= source[lo:hi+1]

    def sort(self, source):
        sz = 1
        N = len(source)
        while sz < N:
            sz_2= sz * 2
            # create the aux array, that will be maintained continuously
            # and add one extra None, so the "prefetching" works also
            # during the last iteration (refering to a_i and a_j)
            aux= source[:]
            aux.append(None)
            for lo in range(0, N-sz, sz_2):
                # pdb.set_trace()
                self.merge(source, aux, lo, lo+sz-1, min(lo+sz_2-1, N-1))
            sz = sz_2

    def is_sort(self, source):
        length = len(source)
        for i in range(0, length-1):
            if source[i] > source[i+1]:
                return False
        return True

实施的排序算法性能不佳

1 个答案: