Question

[http://jsperf.com/optimized-mergesort-versus-quicksort][1]
为什么这个半缓冲区合并排序的工作和quicksort一样快？

QuickSort是：

就地，虽然它占用n/2递归（堆栈空间）
高速缓存亲和

这半缓冲区合并排序：

使用log(n)缓冲区进行合并。
使用function partition(a, i, j) { var p = i + Math.floor((j - i) / 2); var left = i + 1; var right = j; swap(a, i, p); var pivot = a[i]; while (left <= right) { while (builtinLessThan(a[left], pivot)) { ++left; } while (builtinLessThan(pivot, a[right])) { --right; } if (left <= right) { swap(a, left, right); ++left; --right; } } swap(a, i, right); return right; }; function quickSort(a, i, j) { var p = partition(a, i, j); if ((p) - i > j - p) { if (i < p - 1) { quickSort(a, i, p - 1); } if (p + 1 < j) { quickSort(a, p + 1, j); } } else { if (p + 1 < j) { quickSort(a, p + 1, j); } if (i < p - 1) { quickSort(a, i, p - 1); } } };递归。
进行较少的比较。

我的问题是，在这种情况下，为什么半缓冲区合并排序与QuickSort的速度相匹配？另外，我对quickSort做错了什么让它变慢了？

IsMouseOver

Answer 1

合并排序比较少，但比快速排序更多动作。必须调用函数来进行比较会增加比较的开销，这会使快速排序变慢。示例快速排序中的所有if语句也会降低它的速度。如果比较和交换是内联完成的，那么如果对伪随机整数数组进行排序，快速排序应该会快一些。

如果在具有16个寄存器的处理器上运行，例如64位模式的PC，则使用最终在寄存器中的一串指针进行4路合并排序与快速排序一样快。对于每个移动的元素，2路合并排序平均为1比较，而对于移动的每个元素，4路合并排序平均为3比较，但是仅传递1/2的传递次数，因此基本操作的数量相同，但是比较缓存更友好，使4路合并排序快15％左右，与快速排序大致相同。

我不熟悉java脚本，所以我将这些示例转换为C ++。

使用转换后的java脚本合并排序版本，对1.6百万个伪随机32位整数进行排序大约需要2.4秒。下面显示的示例快速排序大约需要1.4秒，下面显示的示例自下而上合并排序大约1.6秒。如上所述，使用16个寄存器的处理器上的一串指针（或索引）进行4路合并也需要大约1.4秒。

C ++快速排序示例：

void QuickSort(int a[], int lo, int hi) {
    int i = lo, j = hi;
    int pivot = a[(lo + hi) / 2];
    int t;
    while (i <= j) {            // partition
        while (a[i] < pivot)
            i++;
        while (a[j] > pivot)
            j--;
        if (i <= j) {
            t = a[i]
            a[i] = a[j];
            a[j] = t;
            i++;
            j--;
        }
    }
    if (lo < j)                 // recurse
        QuickSort(a, lo, j);
    if (i < hi)
        QuickSort(a, i, hi);
}

C ++自下而上合并排序示例：

void BottomUpMergeSort(int a[], int b[], size_t n)
{
size_t s = 1;                               // run size 
    if(GetPassCount(n) & 1){                // if odd number of passes
        for(s = 1; s < n; s += 2)           // swap in place for 1st pass
            if(a[s] < a[s-1])
                std::swap(a[s], a[s-1]);
        s = 2;
    }
    while(s < n){                           // while not done
        size_t ee = 0;                      // reset end index
        while(ee < n){                      // merge pairs of runs
            size_t ll = ee;                 // ll = start of left  run
            size_t rr = ll+s;               // rr = start of right run
            if(rr >= n){                    // if only left run
                rr = n;
                BottomUpCopy(a, b, ll, rr); //   copy left run
                break;                      //   end of pass
            }
            ee = rr+s;                      // ee = end of right run
            if(ee > n)
                ee = n;
            BottomUpMerge(a, b, ll, rr, ee);
        }
        std::swap(a, b);                    // swap a and b
        s <<= 1;                            // double the run size
    }
}

void BottomUpMerge(int a[], int b[], size_t ll, size_t rr, size_t ee)
{
    size_t o = ll;                          // b[]       index
    size_t l = ll;                          // a[] left  index
    size_t r = rr;                          // a[] right index
    while(1){                               // merge data
        if(a[l] <= a[r]){                   // if a[l] <= a[r]
            b[o++] = a[l++];                //   copy a[l]
            if(l < rr)                      //   if not end of left run
                continue;                   //     continue (back to while)
            while(r < ee)                   //   else copy rest of right run
                b[o++] = a[r++];
            break;                          //     and return
        } else {                            // else a[l] > a[r]
            b[o++] = a[r++];                //   copy a[r]
            if(r < ee)                      //   if not end of right run
                continue;                   //     continue (back to while)
            while(l < rr)                   //   else copy rest of left run
                b[o++] = a[l++];
            break;                          //     and return
        }
    }
}

void BottomUpCopy(int a[], int b[], size_t ll, size_t rr)
{
    while(ll < rr){                         // copy left run
        b[ll] = a[ll];
        ll++;
    }
}

size_t GetPassCount(size_t n)               // return # passes
{
    size_t i = 0;
    for(size_t s = 1; s < n; s <<= 1)
        i += 1;
    return(i);
}

使用指针进行4路合并排序的C ++示例（goto＆＃39; s用于保存代码空间，它是旧代码）。它开始进行4路合并，然后当到达运行结束时，它切换到3路合并，然后2路合并，然后是剩余运行剩下的剩余部分的副本。这与用于外部排序的算法类似，但外部排序逻辑更通用，并且通常最多可处理16路合并。

int * BottomUpMergeSort(int a[], int b[], size_t n)
{
int *p0r;       // ptr to      run 0
int *p0e;       // ptr to end  run 0
int *p1r;       // ptr to      run 1
int *p1e;       // ptr to end  run 1
int *p2r;       // ptr to      run 2
int *p2e;       // ptr to end  run 2
int *p3r;       // ptr to      run 3
int *p3e;       // ptr to end  run 3
int *pax;       // ptr to set of runs in a
int *pbx;       // ptr for merged output to b
size_t rsz = 1; // run size
    if(n < 2)
        return a;
    if(n == 2){
        if(a[0] > a[1])std::swap(a[0],a[1]);
        return a;
    }
    if(n == 3){
        if(a[0] > a[2])std::swap(a[0],a[2]);
        if(a[0] > a[1])std::swap(a[0],a[1]);
        if(a[1] > a[2])std::swap(a[1],a[2]);
        return a;
    }
    while(rsz < n){
        pbx = &b[0];
        pax = &a[0];
        while(pax < &a[n]){
            p0e = rsz + (p0r = pax);
            if(p0e >= &a[n]){
                p0e = &a[n];
                goto cpy10;}
            p1e = rsz + (p1r = p0e);
            if(p1e >= &a[n]){
                p1e = &a[n];
                goto mrg201;}
            p2e = rsz + (p2r = p1e);
            if(p2e >= &a[n]){
                p2e = &a[n];
                goto mrg3012;}
            p3e = rsz + (p3r = p2e);
            if(p3e >= &a[n])
                p3e = &a[n];
            // 4 way merge
            while(1){
                if(*p0r <= *p1r){
                    if(*p2r <= *p3r){
                        if(*p0r <= *p2r){
mrg40:                      *pbx++ = *p0r++;    // run 0 smallest
                            if(p0r < p0e)       // if not end run continue
                                continue;
                            goto mrg3123;       // merge 1,2,3
                        } else {
mrg42:                      *pbx++ = *p2r++;    // run 2 smallest
                            if(p2r < p2e)       // if not end run continue
                                continue;
                            goto mrg3013;       // merge 0,1,3
                        }
                    } else {
                        if(*p0r <= *p3r){
                            goto mrg40;         // run 0 smallext
                        } else {
mrg43:                      *pbx++ = *p3r++;    // run 3 smallest
                            if(p3r < p3e)       // if not end run continue
                                continue;
                            goto mrg3012;       // merge 0,1,2
                        }
                    }
                } else {
                    if(*p2r <= *p3r){
                        if(*p1r <= *p2r){
mrg41:                      *pbx++ = *p1r++;    // run 1 smallest
                            if(p1r < p1e)       // if not end run continue
                                continue;
                            goto mrg3023;       // merge 0,2,3
                        } else {
                            goto mrg42;         // run 2 smallest
                        }
                    } else {
                        if(*p1r <= *p3r){
                            goto mrg41;         // run 1 smallest
                        } else {
                            goto mrg43;         // run 3 smallest
                        }
                    }
                }
            }
            // 3 way merge
mrg3123:    p0r = p1r;
            p0e = p1e;
mrg3023:    p1r = p2r;
            p1e = p2e;
mrg3013:    p2r = p3r;
            p2e = p3e;
mrg3012:    while(1){
                if(*p0r <= *p1r){
                    if(*p0r <= *p2r){
                        *pbx++ = *p0r++;        // run 0 smallest
                        if(p0r < p0e)           // if not end run continue
                            continue;
                        goto mrg212;            // merge 1,2
                    } else {
mrg32:                  *pbx++ = *p2r++;        // run 2 smallest
                        if(p2r < p2e)           // if not end run continue
                            continue;
                        goto mrg201;            // merge 0,1
                    }
                } else {
                    if(*p1r <= *p2r){
                        *pbx++ = *p1r++;        // run 1 smallest
                        if(p1r < p1e)           // if not end run continue
                            continue;
                        goto mrg202;            // merge 0,2
                    } else {
                        goto mrg32;             // run 2 smallest
                    }
                }
            }
            // 2 way merge
mrg212:     p0r = p1r;
            p0e = p1e;
mrg202:     p1r = p2r;
            p1e = p2e;
mrg201:     while(1){
                if(*p0r <= *p1r){
                    *pbx++ = *p0r++;            // run 0 smallest
                    if(p0r < p0e)               // if not end run continue
                        continue;
                    goto cpy11;
                } else {
                    *pbx++ = *p1r++;            // run 1 smallest
                    if(p1r < p1e)               // if not end run continue
                        continue;
                    goto cpy10;
                }
            }
            // 1 way copy
cpy11:      p0r = p1r;
            p0e = p1e;
cpy10:      while (1) {
                *pbx++ = *p0r++;                // copy element
                if (p0r < p0e)                  // if not end of run continue
                    continue;
                break;
            }
            pax += rsz << 2;            // setup for next set of runs
        }
        std::swap(a, b);                // swap ptrs
        rsz <<= 2;                      // quadruple run size
    }
    return a;                           // return sorted array
}

优化的合并排序比快速排序更快

1 个答案: