我已经实现了以下quickselect算法来实现中位数选择的O(n)
复杂度(更常见的是第k个最小数字):
static size_t partition(struct point **points_ptr, size_t points_size, size_t pivot_idx)
{
const double pivot_value = points_ptr[pivot_idx]->distance;
/* Move pivot to the end. */
SWAP(points_ptr[pivot_idx], points_ptr[points_size - 1], struct point *);
/* Perform the element moving. */
size_t border_idx = 0;
for (size_t i = 0; i < points_size - 1; ++i) {
if (points_ptr[i]->distance < pivot_value) {
SWAP(points_ptr[border_idx], points_ptr[i], struct point *);
border_idx++;
}
}
/* Move pivot to act as a border element. */
SWAP(points_ptr[border_idx], points_ptr[points_size - 1], struct point *);
return border_idx;
}
static struct point * qselect(struct point **points_ptr, size_t points_size, size_t k)
{
const size_t pivot_idx = partition(points_ptr, points_size, rand() % points_size);
if (k == pivot_idx) { //k lies on the same place as a pivot
return points_ptr[pivot_idx];
} else if (k < pivot_idx) { //k lies on the left of the pivot
//points_ptr remains the same
points_size = pivot_idx;
//k remains the same
} else { //k lies on the right of the pivot
points_ptr += pivot_idx + 1;
points_size -= pivot_idx + 1;
k -= pivot_idx + 1;
}
return qselect(points_ptr, points_size, k);
}
然后我尝试将它与glibc的qsort()
与O(nlog(n))
进行比较,并对其卓越的性能感到惊讶。这是测量代码:
double wtime;
wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
wtime -= omp_get_wtime();
qsort(points_ptr, points_size, sizeof (*points_ptr), compar_distance);
wtime += omp_get_wtime();
}
printf("qsort took %f\n", wtime);
wtime = 0.0;
for (size_t i = 0; i < 1000; ++i) {
qsort(points_ptr, points_size, sizeof (*points_ptr), compar_rand);
wtime -= omp_get_wtime();
qselect(points_ptr, points_size, points_size / 2);
wtime += omp_get_wtime();
}
printf("qselect took %f\n", wtime);
对于10000个元素的数组,结果类似于qsort took 0.280432
,qselect took 8.516676
。为什么quicksort比quickselect更快?
答案 0 :(得分:1)
第一个明显的答案是:也许qsort没有实现quicksort。 自从我阅读标准以来已经有一段时间了,但我认为没有任何要求qsort()执行快速排序。
第二:现有的C标准库经常被大量优化(例如,在可用的情况下使用特殊的汇编指令)。结合现代CPU的复杂性能特征,这很可能导致O(n log n) - 快速排序不是 - 算法比O(n)算法快。
我的猜测是你搞乱了缓存 - valgrind / cachegrind可以告诉你的东西。
答案 1 :(得分:1)
感谢您的建议,我对quickselect实施的问题在于它展示了inputs that contain many repeated elements的最坏情况复杂度O(n^2)
,这是我的情况。 Glibc的qsort()
(默认使用mergesort)在此处不显示O(n^2)
。
我已经修改了我的partition()
函数来执行基本的3向分区,并且中间的3对于快速选择很有效:
/** \breif Quicksort's partition procedure.
*
* In linear time, partition a list into three parts: less than, greater than
* and equals to the pivot, for example input 3 2 7 4 5 1 4 1 will be
* partitioned into 3 2 1 1 | 5 7 | 4 4 4 where 4 is the pivot.
* Modified version of the median-of-three strategy is implemented, it ends with
* a median at the end of an array (this saves us one or two swaps).
*/
static void partition(struct point **points_ptr, size_t points_size,
size_t *less_size, size_t *equal_size)
{
/* Modified median-of-three and pivot selection. */
struct point **first_ptr = points_ptr;
struct point **middle_ptr = points_ptr + (points_size / 2);
struct point **last_ptr = points_ptr + (points_size - 1);
if ((*first_ptr)->distance > (*last_ptr)->distance) {
SWAP(*first_ptr, *last_ptr, struct point *);
}
if ((*first_ptr)->distance > (*middle_ptr)->distance) {
SWAP(*first_ptr, *middle_ptr, struct point *);
}
if ((*last_ptr)->distance > (*middle_ptr)->distance) { //reversed
SWAP(*last_ptr, *middle_ptr, struct point *);
}
const double pivot_value = (*last_ptr)->distance;
/* Element swapping. */
size_t greater_idx = 0;
size_t equal_idx = points_size - 1;
size_t i = 0;
while (i < equal_idx) {
const double elem_value = points_ptr[i]->distance;
if (elem_value < pivot_value) {
SWAP(points_ptr[greater_idx], points_ptr[i], struct point *);
greater_idx++;
i++;
} else if (elem_value == pivot_value) {
equal_idx--;
SWAP(points_ptr[i], points_ptr[equal_idx], struct point *);
} else { //elem_value > pivot_value
i++;
}
}
*less_size = greater_idx;
*equal_size = points_size - equal_idx;
}
/** A selection algorithm to find the kth smallest element in an unordered list.
*/
static struct point * qselect(struct point **points_ptr, size_t points_size,
size_t k)
{
size_t less_size;
size_t equal_size;
partition(points_ptr, points_size, &less_size, &equal_size);
if (k < less_size) { //k lies in the less-than-pivot partition
points_size = less_size;
} else if (k < less_size + equal_size) { //k lies in the equals-to-pivot partition
return points_ptr[points_size - 1];
} else { //k lies in the greater-than-pivot partition
points_ptr += less_size;
points_size -= less_size + equal_size;
k -= less_size + equal_size;
}
return qselect(points_ptr, points_size, k);
}
结果确实是线性的并且优于qsort()
(我使用了@IVlad所建议的Fisher-Yates shuffle,所以绝对qsort()
次更糟糕了:
array size qsort qselect speedup
1000 0.044678 0.008671 5.152328
5000 0.248413 0.045899 5.412160
10000 0.551095 0.096064 5.736730
20000 1.134857 0.191933 5.912773
30000 2.169177 0.278726 7.782467