Question

我有一个2d数组，我想按行排序，这意味着如果数组是

 3     2     2     3     2     2     3     3     3     3
 3     3     2     2     2     2     3     3     2     2
 3     2     2     3     2     2     3     3     3     2
 2     2     2     2     2     2     2     2     2     2
 3     2     2     2     2     2     3     2     2     2
 2     2     2     2     2     2     2     2     2     2
 3     3     2     3     2     2     3     3     2     3
 3     3     2     2     2     2     3     3     3     3
 3     2     2     3     2     2     3     3     2     3
 3     3     2     3     2     2     3     3     3     3

我想拿数组

 2     2     2     2     2     2     2     2     2     2
 2     2     2     2     2     2     2     2     2     2
 3     2     2     2     2     2     3     2     2     2
 3     2     2     3     2     2     3     3     2     3
 3     2     2     3     2     2     3     3     3     2
 3     2     2     3     2     2     3     3     3     3
 3     3     2     2     2     2     3     3     2     2
 3     3     2     2     2     2     3     3     3     3
 3     3     2     3     2     2     3     3     2     3
 3     3     2     3     2     2     3     3     3     3

我在纯CUDA中检查了一些基数排序的实现，但它们看起来相当复杂。使用Thrust有没有相对简单的方法呢？

Answer 1

有可能在推力中做到这一点。一种可能的方法是创建一个自定义排序仿函数，遍历给它的行（让我们说通过传递给仿函数的索引来识别行），然后决定这些行的排序。

为了实现这一点，我们可以创建一个索引数组，每行一个索引，我们将对其进行排序。我们将根据给定的数据数组（使用命令行的自定义排序函数）对此索引数组进行排序。

最后，我们唯一排序的是索引数组，但现在按照需要重新排列行所需的顺序。

这是一个功能齐全的例子：

$ cat t631.cu
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/sequence.h>
#include <thrust/copy.h>

#define DWIDTH 10

typedef int mytype;

struct my_sort_functor
{
  int my_width;
  mytype *my_data;
  my_sort_functor(int _my_width, mytype * _my_data): my_width(_my_width), my_data(_my_data) {};

  __host__ __device__
  bool operator()(const int idx1, const int idx2) const
    {
      bool flip = false;
      for (int col_idx = 0; col_idx < my_width; col_idx++){
        mytype d1 = my_data[(idx1*my_width)+col_idx];
        mytype d2 = my_data[(idx2*my_width)+col_idx];
        if (d1 > d2) break;
        if (d1 < d2) {flip = true; break;}
        }
      return flip;
    }
};

int main(){

  mytype data[] = {
    3,     2,     2,     3,     2,     2,     3,     3,     3,     3,
    3,     3,     2,     2,     2,     2,     3,     3,     2,     2,
    3,     2,     2,     3,     2,     2,     3,     3,     3,     2,
    2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
    3,     2,     2,     2,     2,     2,     3,     2,     2,     2,
    2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
    3,     3,     2,     3,     2,     2,     3,     3,     2,     3,
    3,     3,     2,     2,     2,     2,     3,     3,     3,     3,
    3,     2,     2,     3,     2,     2,     3,     3,     2,     3,
    3,     3,     2,     3,     2,     2,     3,     3,     3,     3 };

  int cols  = DWIDTH;
  int dsize = sizeof(data)/sizeof(mytype);
  int rows  = dsize/cols;
  thrust::host_vector<mytype>   h_data(data, data+dsize);
  thrust::device_vector<mytype> d_data = h_data;
  thrust::device_vector<int> idxs(rows);
  thrust::sequence(idxs.begin(), idxs.end());
  thrust::sort(idxs.begin(), idxs.end(), my_sort_functor(cols, thrust::raw_pointer_cast(d_data.data())));
  thrust::host_vector<int> h_idxs = idxs;

  for (int i = 0; i<rows; i++){
    thrust::copy(h_data.begin()+h_idxs[i]*cols, h_data.begin()+(h_idxs[i]+1)*cols, std::ostream_iterator<mytype>(std::cout, ", "));
    std::cout << std::endl;}
  return 0;
}

$ nvcc -o t631 t631.cu
$ ./t631
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 2, 2, 2, 2, 2, 3, 2, 2, 2,
3, 2, 2, 3, 2, 2, 3, 3, 2, 3,
3, 2, 2, 3, 2, 2, 3, 3, 3, 2,
3, 2, 2, 3, 2, 2, 3, 3, 3, 3,
3, 3, 2, 2, 2, 2, 3, 3, 2, 2,
3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 2, 3, 2, 2, 3, 3, 2, 3,
3, 3, 2, 3, 2, 2, 3, 3, 3, 3,
$

我非常确定如果数据可以以转置形式传递，并且重新排列代码以对列进行排序而不是行（即根据数据数组中的列对索引向量进行排序，这将非常有效）而不是行）。这对于由排序函数驱动的底层数据访问会更有效。

我省略了实际将行移动到新位置的步骤，但希望这应该是直截了当的。在输出结果的方法中暗示了一般方法，尽管如果需要可以通过单个推力调用来完成。

使用Thrust在Cuda中对2D数组进行排序

1 个答案: