我想做一些推力操作,但我不确定究竟是怎么做的。
现在,我收到的数组中充满了零(h_a数组)
我有:
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>
template <typename T>
struct square
{
__host__ __device__
T operator()( const T& x ) const
{
return x * x;
}
};
int
main(
int argc,
const char * argv[] )
{
const size_t NbOfPoints = 256;
int BlocksPerGridX = 16;
int BlocksPerGridY = 16;
int ThreadsPerBlockX = 16;
int ThreadsPerBlockY = 16;
// generate random data on the host
thrust::host_vector<float> h_Kx ( NbOfPoints );
thrust::generate( h_Kx.begin(), h_Kx.end(), rand );
thrust::host_vector<float> h_Ky ( NbOfPoints );
thrust::generate( h_Ky.begin(), h_Ky.end(), rand );
// transfer to device
thrust::device_vector<float> dev_Kx = h_Kx;
thrust::device_vector<float> dev_Ky = h_Ky;
// create arrays for holding the number of threads per block in each dimension
int * X , * Y;
cudaMalloc((void **) &X, ThreadsPerBlockX * BlocksPerGridX * sizeof(*X) );
cudaMalloc((void **) &Y, ThreadsPerBlockY * BlocksPerGridY * sizeof(*Y) );
// wrap raw pointer with a device_ptr
thrust::device_ptr<int> dev_X ( X );
thrust::device_ptr<int> dev_Y ( Y );
// use device_ptr in Thrust algorithms
thrust::fill( dev_X, dev_X + ( ThreadsPerBlockX * BlocksPerGridX ) , (int) 0 );
thrust::fill( dev_Y, dev_Y + ( ThreadsPerBlockY * BlocksPerGridY ) , (int) 0 );
// setup arguments
square<float> square_op;
// create various vectors
thrust::device_vector<int> distX ( NbOfPoints );
thrust::device_vector<int> distY ( NbOfPoints );
thrust::device_vector<unsigned int> Tmp ( NbOfPoints );
thrust::host_vector<unsigned int> h_a ( NbOfPoints );
thrust::device_vector<unsigned int> distXSquared ( NbOfPoints );
thrust::device_vector<unsigned int> distYSquared ( NbOfPoints );
// compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );
//square distances
thrust::transform( distX.begin(), distX.end(), distXSquared.begin(), square_op );
thrust::transform( distY.begin(), distY.end(), distYSquared.begin(), square_op );
// compute Tmp = distX + distY
thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );
thrust::copy( Tmp.begin(), Tmp.end(), h_a.begin() );
for ( int i = 0; i < 5; i ++ )
printf("\n temp = %u",h_a[ i ] );
return 0;
}
更新:
除了Robert Crovella的编辑,您必须编辑为整数:
square<int> square_op;
thrust::transform( dev_Kx.begin(), dev_Kx.end(), dev_X , distX.begin() , thrust::minus<int>() );
thrust::transform( dev_Ky.begin(), dev_Ky.end(), dev_Y , distY.begin() , thrust::minus<int>() );
答案 0 :(得分:2)
你有几个做零长度变换的例子:
thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );
和
thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );
由于上述每个变换的前两个参数相同,因此正在完成的工作为零。大概你希望相应的.end()
迭代器在第二个位置而不是.begin()
当我进行这些更改时,我打印出非零值。它们非常大,但你似乎正在对大值进行平方,所以我不确定你的意图是什么。