CUDA Thrust copy仅在满足谓词时才转换结果

时间:2016-08-31 03:04:21

标签: cuda thrust

我想对输入thrust::device_vector执行转换,并且只有在结果满足谓词时才将结果复制到输出向量。因此,结果的数量可能小于输入device_vector的大小(类似于thrust::copy_if的输出向量)。我还没有找到使用thrust :: transform_if做到这一点的方法。目前,我可以使用thrust::transformthrust::remove_if执行此操作,如下例所示:

#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/remove.h>
#include <iostream>

__host__ __device__ unsigned int hash(unsigned int a) {
  a = (a+0x7ed55d16) + (a<<12);
  a = (a^0xc761c23c) ^ (a>>19);
  a = (a+0x165667b1) + (a<<5);
  a = (a+0xd3a2646c) ^ (a<<9);
  a = (a+0xfd7046c5) + (a<<3);
  a = (a^0xb55a4f09) ^ (a>>16);
  return a;
};

struct add_random {
  __host__ __device__ add_random() {}
  __device__ int operator()(const int n, const int x) const {
    thrust::default_random_engine rng(hash(n));
    thrust::uniform_int_distribution<int> uniform(0, 11);
    return uniform(rng)+x;
  } 
};

struct is_greater {
  __host__ __device__ bool operator()(const int x) {
    return x > 6 ;
  }
};

int main(void) {
  int x[5] = {10, 2, 5, 3, 0};
  thrust::device_vector<int> d_x(x, x+5);

  thrust::transform(
      thrust::counting_iterator<int>(0),
      thrust::counting_iterator<int>(5),
      d_x.begin(),
      d_x.begin(),
      add_random());

  std::cout << "after adding random number:" << std::endl;
  std::ostream_iterator<int> o(std::cout, " ");
  thrust::copy(d_x.begin(), d_x.end(), o);
  std::cout << std::endl;

  thrust::device_vector<int>::iterator new_end(thrust::remove_if(d_x.begin(), d_x.end(), is_greater()));

  std::cout << "after removing values greater than 6:" << std::endl;
  thrust::copy(d_x.begin(), new_end, o);
  std::cout << std::endl;

  return 0;
}

给出了输出:

after adding random number:
18 4 8 7 11 
after removing values greater than 6:
4 

我希望避免将结果复制到内存中两次,首先是thrust::transform,然后是thrust::remove_if,如上例所示。是否可以通过单个转换函数获得上述输出?我怎样才能做到这一点?我最关心的是计算成本,所以任何优化的解决方案,即使它不使用Thrust库也会很棒。

1 个答案:

答案 0 :(得分:4)

欢迎来到推力花式迭代器的世界。通过查看thrust quick start guide,您可以快速了解一些奇特的迭代器类型。特别是,推力变换迭代器可以经常用于替换应用于另一个推力算法输入的推力变换操作,将两个算法“融合”到一个操作中。

以下是适用于您案例的实用示例:

$ cat t1254.cu
#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/remove.h>
#include <iostream>

__host__ __device__ unsigned int hash(unsigned int a) {
  a = (a+0x7ed55d16) + (a<<12);
  a = (a^0xc761c23c) ^ (a>>19);
  a = (a+0x165667b1) + (a<<5);
  a = (a+0xd3a2646c) ^ (a<<9);
  a = (a+0xfd7046c5) + (a<<3);
  a = (a^0xb55a4f09) ^ (a>>16);
  return a;
};

struct add_random : public thrust::unary_function<thrust::tuple<int, int>, int> {
  __host__ __device__ int operator()(thrust::tuple<int, int> t) const {
    int n = thrust::get<0>(t);
    int x = thrust::get<1>(t);
    thrust::default_random_engine rng(hash(n));
    thrust::uniform_int_distribution<int> uniform(0, 11);
    return uniform(rng)+x;
  }
};

struct is_greater {
  __host__ __device__ bool operator()(const int x) {
    return x < 6 ;
  }
};

int main(void) {
  int x[5] = {10, 2, 5, 3, 0};
  thrust::device_vector<int> d_x(x, x+5);
  thrust::device_vector<int> d_r(5);
  int rsize = thrust::copy_if(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(0), d_x.begin())), add_random()), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(5), d_x.end())), add_random()), d_r.begin(), is_greater())- d_r.begin();
  std::cout << "after removing values greater than 6:" << std::endl;
  thrust::copy_n(d_r.begin(), rsize, std::ostream_iterator<int>(std::cout, " "));
  std::cout << std::endl;

  return 0;
}
$ nvcc -o t1254 t1254.cu
$ ./t1254
after removing values greater than 6:
4
$
  1. 我们已经将转换迭代器应用于相同的两个输入来替换您的转换操作。由于你有两个转换操作的输入,我们使用一个zip迭代器来组合这些,并且转换函子也经过重新设计以接受该元组作为其输入。

  2. 将remove_if转换为copy_if,以将变换迭代器作为输入。这需要对复制谓词的逻辑稍作修改。