Question

我已经在Rcpp中实现了快速排序算法，但是对于大型数组，它的工作速度明显慢于sort(array, method="quick")。为什么？

这是我的Rcpp代码

// partition using hoare's scheme

#include <Rcpp.h>
using namespace Rcpp;

int partition(NumericVector a,int start,int end)
{
    double pivot = a[end];
    int i = start - 1;
    int j = end + 1;
    //Rcout << a <<"\n";
    while(1)
    {
        do {
            i++;
        } while (a[i] < pivot);

        do {
            j--;
        } while (pivot < a[j]);

        if(i > j)
            return j;

        //special.Swap(a, i, j);
        std::swap(a[i], a[j]);
    }
}

void qsort(NumericVector a,int start,int end)
{
  //Rcout << start <<"," << end <<"\n";
  if(start < end)
  {
    int P_index = partition(a, start, end);
    //Rcout << P_index << "\n";
    qsort(a, start, P_index);
    qsort(a, P_index + 1, end);
  }
}


// [[Rcpp::export]]
NumericVector QuickSortH_WC(NumericVector arr)
{
  int len = arr.size();
  qsort(arr, 0, len-1);
  //Rcout << arr <<"\n";
  return 1;
}

对于具有浮点值的数组，算法也更差。我想与hoare和lomuto分区方案进行比较，但是我不知道此实现是否在算法运行速度较慢方面没有任何缺陷。

Answer 1

代码效率低下的主要原因似乎是要比较的两种分区方案的混合。您声称使用了Hoare分区方案，并且代码看起来非常相似，但是pivot是根据Lomuto分区方案计算的。此外，如果有{{1}，则应返回j，而不是i >= j，则应返回。修复这两件事，并用稍微快一点的i > j代替i++：

++i

输出

// partition using hoare's scheme

#include <Rcpp.h>
using namespace Rcpp;

int partition(NumericVector a,int start,int end)
{
    double pivot = a[(start + end) / 2];
    int i = start - 1;
    int j = end + 1;
    //Rcout << a <<"\n";
    while(1)
    {
        do {
            ++i;
        } while (a[i] < pivot);

        do {
            --j;
        } while (pivot < a[j]);

        if(i >= j)
            return j;

        //special.Swap(a, i, j);
        std::swap(a[i], a[j]);
    }
}

void qsort(NumericVector a,int start,int end)
{
    //Rcout << start <<"," << end <<"\n";
    if(start < end)
    {
        int P_index = partition(a, start, end);
        //Rcout << P_index << "\n";
        qsort(a, start, P_index);
        qsort(a, P_index + 1, end);
    }
}


// [[Rcpp::export]]
NumericVector QuickSortH_WC(NumericVector arr)
{
    int len = arr.size();
    qsort(arr, 0, len-1);
    //Rcout << arr <<"\n";
    return arr;
}

/*** R
set.seed(42)
dat <- runif(1e6)
bench::mark(QuickSortH_WC(dat), sort(dat, method="quick"))
*/

因此，尽管此方法比R's sort慢约7倍，但它的运行时间至少具有可比的数量级。（感谢@JosephWood挖掘出链接）。 Wikipedia列出了这两种模式的更多改进。

顺便说一句，我还更改了包装器函数以返回更改后的数组。这使我可以使用默认行为> bench::mark(QuickSortH_WC(dat), sort(dat, method="quick")) # A tibble: 2 x 13 expression min median `itr/sec` mem_alloc `gc/sec` n_itr <bch:expr> <bch:> <bch:t> <dbl> <bch:byt> <dbl> <int> 1 QuickSortH_WC(dat) 95.7ms 100.5ms 8.63 2.49KB 43.2 5 2 sort(dat, method = "quick") 15ms 16.5ms 53.1 11.44MB 23.6 27 # … with 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>, # memory <list>, time <list>, gc <list> Warning message: Some expressions had a GC in every iteration; so filtering is disabled.来比较返回的结果。我觉得有用...

Answer 2

Rcpp 严重地应用了递归函数。我建议迭代快速排序实现：

void _Quick_sorti( double _array[],int _l,int _h){
    int *_stack=new int [_h-_l+1]; double _tmp;int _i,_p,_top=-1;
    _stack[++_top]=_l;_stack[++_top]=_h;
    while(_top>=0){
      _h=_stack[_top--];_l=_stack[_top--];
      _tmp=_array[_h];
      _i=_l-1;
      for(int _j=_l;_j<=_h-1;_j++){
        if(_array[_j]<=_tmp){_i++;std::swap(_array[_i],_array[_j]);}
      }
      _p=_i+1;
      std::swap(_array[_p],_array[_h]);
      if(_p-1>_l){_stack[++_top]=_l;_stack[++_top]=_p-1;}
      if(_p+1<_h){_stack[++_top]=_p+1;_stack[++_top]=_h;}
    }
  delete _stack;
  }


     // [[Rcpp::export]]
SEXP Quick_sorti(SEXP &unsorted) { //run
  SEXP temp=clone(unsorted);// or Rf_duplicate
  double *z=REAL(temp);
  int N=LENGTH(temp)-1;
  int k=0;
  _Quick_sorti(z,k,N); // note that we have provide lvalue (if we put 0 it will not works int place of N)
  return temp;}

该代码是从包含'_'前缀的宏改编而成的，而且使用R内部，看起来很难看。添加堆栈意味着需要增加N个内存。

为什么在rcpp中实现的快速排序工作缓慢？

2 个答案: