推力操作空主机阵列

时间:2014-12-09 11:48:50

标签: cuda thrust

我想做一些推力操作,但我不确定究竟是怎么做的。

现在,我收到的数组中充满了零(h_a数组)

我有:

#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>

#include <cuda.h>
#include <cuda_runtime_api.h>

#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>


template <typename T>
struct square
{
    __host__ __device__
    T operator()( const T& x ) const
    {
        return x * x;
    }

};


int
main(
             int argc,
    const char * argv[] )
{
    const size_t NbOfPoints  = 256;

    int BlocksPerGridX    = 16;
    int BlocksPerGridY    = 16;

    int ThreadsPerBlockX  = 16;
    int ThreadsPerBlockY  = 16;

    // generate random data on the host
    thrust::host_vector<float> h_Kx ( NbOfPoints );
    thrust::generate( h_Kx.begin(), h_Kx.end(), rand );

    thrust::host_vector<float> h_Ky ( NbOfPoints );
    thrust::generate( h_Ky.begin(), h_Ky.end(), rand );

    // transfer to device
    thrust::device_vector<float> dev_Kx = h_Kx;
    thrust::device_vector<float> dev_Ky = h_Ky;

    // create arrays for holding the number of threads per block in each dimension
    int * X , * Y;
    cudaMalloc((void **) &X, ThreadsPerBlockX * BlocksPerGridX * sizeof(*X) );
    cudaMalloc((void **) &Y, ThreadsPerBlockY * BlocksPerGridY * sizeof(*Y) );

    // wrap raw pointer with a device_ptr
    thrust::device_ptr<int> dev_X ( X );
    thrust::device_ptr<int> dev_Y ( Y );

    // use device_ptr in Thrust algorithms
    thrust::fill( dev_X, dev_X + ( ThreadsPerBlockX * BlocksPerGridX ) , (int) 0 );
    thrust::fill( dev_Y, dev_Y + ( ThreadsPerBlockY * BlocksPerGridY ) , (int) 0 );

    // setup arguments
    square<float> square_op;

    // create various vectors
    thrust::device_vector<int> distX ( NbOfPoints );
    thrust::device_vector<int> distY ( NbOfPoints );
    thrust::device_vector<unsigned int> Tmp ( NbOfPoints );
    thrust::host_vector<unsigned int> h_a ( NbOfPoints );
    thrust::device_vector<unsigned int> distXSquared ( NbOfPoints );
    thrust::device_vector<unsigned int> distYSquared ( NbOfPoints );


    // compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
    thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
    thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

    //square distances
    thrust::transform( distX.begin(), distX.end(), distXSquared.begin(), square_op );
    thrust::transform( distY.begin(), distY.end(), distYSquared.begin(), square_op );

    // compute Tmp =  distX + distY
    thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );
    thrust::copy( Tmp.begin(), Tmp.end(), h_a.begin() );


    for ( int i = 0; i < 5; i ++ )
        printf("\n temp = %u",h_a[ i ] );


return 0;
}

更新:

除了Robert Crovella的编辑,您必须编辑为整数:

square<int> square_op;
thrust::transform( dev_Kx.begin(), dev_Kx.end(), dev_X , distX.begin() , thrust::minus<int>() );
thrust::transform( dev_Ky.begin(), dev_Ky.end(), dev_Y , distY.begin() , thrust::minus<int>() );

1 个答案:

答案 0 :(得分:2)

你有几个做零长度变换的例子:

thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );

由于上述每个变换的前两个参数相同,因此正在完成的工作为零。大概你希望相应的.end()迭代器在第二个位置而不是.begin()

当我进行这些更改时,我打印出非零值。它们非常大,但你似乎正在对大值进行平方,所以我不确定你的意图是什么。