Question

我已经实现了以下quickselect算法来实现中位数选择的O(n)复杂度（更常见的是第k个最小数字）：

static size_t partition(struct point **points_ptr, size_t points_size, size_t pivot_idx)
{
    const double pivot_value = points_ptr[pivot_idx]->distance;

    /* Move pivot to the end. */
    SWAP(points_ptr[pivot_idx], points_ptr[points_size - 1], struct point *);

    /* Perform the element moving. */
    size_t border_idx = 0;
    for (size_t i = 0; i < points_size - 1; ++i) {
            if (points_ptr[i]->distance < pivot_value) {
                    SWAP(points_ptr[border_idx], points_ptr[i], struct point *);
                    border_idx++;
            }
    }

    /* Move pivot to act as a border element. */
    SWAP(points_ptr[border_idx], points_ptr[points_size - 1], struct point *);

    return border_idx;
}

static struct point * qselect(struct point **points_ptr, size_t points_size, size_t k)
{
    const size_t pivot_idx = partition(points_ptr, points_size, rand() % points_size);

    if (k == pivot_idx) { //k lies on the same place as a pivot
            return points_ptr[pivot_idx];
    } else if (k < pivot_idx) { //k lies on the left of the pivot
            //points_ptr remains the same
            points_size = pivot_idx;
            //k remains the same
    } else { //k lies on the right of the pivot
            points_ptr += pivot_idx + 1;
            points_size -= pivot_idx + 1;
            k -= pivot_idx + 1;
    }

    return qselect(points_ptr, points_size, k);
}

然后我尝试将它与glibc的qsort()与O(nlog(n))进行比较，并对其卓越的性能感到惊讶。这是测量代码：

double wtime;
wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
    qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
    wtime -= omp_get_wtime();
    qsort(points_ptr, points_size, sizeof (*points_ptr), compar_distance);
    wtime += omp_get_wtime();
}
printf("qsort took %f\n", wtime);

wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
    qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
    wtime -= omp_get_wtime();
    qselect(points_ptr, points_size, points_size / 2);
    wtime += omp_get_wtime();
}
printf("qselect took %f\n", wtime);

对于10000个元素的数组，结果类似于qsort took 0.280432，qselect took 8.516676。为什么quicksort比quickselect更快？

Answer 1

第一个明显的答案是：也许qsort没有实现quicksort。自从我阅读标准以来已经有一段时间了，但我认为没有任何要求qsort（）执行快速排序。

第二：现有的C标准库经常被大量优化（例如，在可用的情况下使用特殊的汇编指令）。结合现代CPU的复杂性能特征，这很可能导致O（n log n） - 快速排序不是 - 算法比O（n）算法快。

我的猜测是你搞乱了缓存 - valgrind / cachegrind可以告诉你的东西。

Answer 2

感谢您的建议，我对quickselect实施的问题在于它展示了inputs that contain many repeated elements的最坏情况复杂度O(n^2)，这是我的情况。 Glibc的qsort()（默认使用mergesort）在此处不显示O(n^2)。

我已经修改了我的partition()函数来执行基本的3向分区，并且中间的3对于快速选择很有效：

/** \breif Quicksort's partition procedure.                                  
 *                                                                           
 * In linear time, partition a list into three parts: less than, greater than
 * and equals to the pivot, for example input 3 2 7 4 5 1 4 1 will be        
 * partitioned into 3 2 1 1 | 5 7 | 4 4 4 where 4 is the pivot.              
 * Modified version of the median-of-three strategy is implemented, it ends with
 * a median at the end of an array (this saves us one or two swaps).         
 */                                                                          
static void partition(struct point **points_ptr, size_t points_size,
                      size_t *less_size, size_t *equal_size)
{                                                                            
    /* Modified median-of-three and pivot selection. */                      
    struct point **first_ptr = points_ptr;                                   
    struct point **middle_ptr = points_ptr + (points_size / 2);              
    struct point **last_ptr = points_ptr + (points_size - 1);                
    if ((*first_ptr)->distance > (*last_ptr)->distance) {                    
        SWAP(*first_ptr, *last_ptr, struct point *);                         
    }                                                                        
    if ((*first_ptr)->distance > (*middle_ptr)->distance) {                  
        SWAP(*first_ptr, *middle_ptr, struct point *);                       
    }                                                                        
    if ((*last_ptr)->distance > (*middle_ptr)->distance) { //reversed        
        SWAP(*last_ptr, *middle_ptr, struct point *);                        
    }                                                                        
    const double pivot_value = (*last_ptr)->distance;                      

    /* Element swapping. */                                                  
    size_t greater_idx = 0;                                                  
    size_t equal_idx = points_size - 1;                                      
    size_t i = 0;                                                            
    while (i < equal_idx) {                                                  
        const double elem_value = points_ptr[i]->distance;                   

        if (elem_value < pivot_value) {                                      
            SWAP(points_ptr[greater_idx], points_ptr[i], struct point *);    
            greater_idx++;                                                   
            i++;                                                             
        } else if (elem_value == pivot_value) {                              
            equal_idx--;                                                     
            SWAP(points_ptr[i], points_ptr[equal_idx], struct point *);      
        } else { //elem_value > pivot_value                                  
            i++;                                                             
        }                                                                    
    }                                                                        

    *less_size = greater_idx;                                                
    *equal_size = points_size - equal_idx;                                   
}

/** A selection algorithm to find the kth smallest element in an unordered list.
 */                                                                          
static struct point * qselect(struct point **points_ptr, size_t points_size,
                              size_t k)
{                                                                            
    size_t less_size;                                                        
    size_t equal_size;                                                       

    partition(points_ptr, points_size, &less_size, &equal_size);             

    if (k < less_size) { //k lies in the less-than-pivot partition           
        points_size = less_size;                                             
    } else if (k < less_size + equal_size) { //k lies in the equals-to-pivot partition
        return points_ptr[points_size - 1];                                  
    } else { //k lies in the greater-than-pivot partition                    
        points_ptr += less_size;                                             
        points_size -= less_size + equal_size;                               
        k -= less_size + equal_size;                                         
    }                                                                        

    return qselect(points_ptr, points_size, k);                              
}

结果确实是线性的并且优于qsort()（我使用了@IVlad所建议的Fisher-Yates shuffle，所以绝对qsort()次更糟糕了：

array size  qsort     qselect   speedup
1000        0.044678  0.008671  5.152328
5000        0.248413  0.045899  5.412160
10000       0.551095  0.096064  5.736730
20000       1.134857  0.191933  5.912773
30000       2.169177  0.278726  7.782467

第k个最小数字 - 快速选择比快速选择更快

2 个答案: