CUDA,用[a和b]之间的随机值填充矩阵

时间:2014-01-14 11:43:58

标签: random cuda

在cuda应用程序中,我需要在a和b之间的间隔填充一个随机值的矩阵。

我使用网络上已有的代码,使用CURAND,但我不能修改它以产生a和b之间的值。

代码如下:

// Fill the array A(nr_rows_A, nr_cols_A) with random numbers on GPU
void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
{
    // Create a pseudo-random number generator
    curandGenerator_t prng;
    curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);

    // Set the seed for the random number generator using the system clock
    curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());

    // Fill the array with random numbers on the device
    curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);
}


int main(void)
{
    // Variables declaration
    float   *hst_Mat ,
        *dev_Mat;

    int Height = 3 ;
    int Width  = 10 ;
    int vSize = Height*Width ;
    int mSize = sizeof(float)*vSize ;

    hst_Mat = (float *)malloc(mSize) ;
    cudaMalloc((void**)&dev_Mat, mSize) ;

    memset(hst_Mat, 0, mSize) ;
    cudaMemset(dev_Mat, 0, mSize) ;

    // Print initial matrix
    cout << " * Initial matrix : " << endl << "\t" ;
    for(int i=0 ;i<Height ; i++)
    {
        for(int j=0 ; j<Width ; j++)
            cout << "\t" << hst_Mat[i*Width+j] ;
        cout << endl << "\t" ;
    }
    cout << endl << endl ;

//
// Cuda kernel invoke
//
    // Initializing device state for random generator
    GPU_fill_rand(dev_Mat, Height, Width) ;

    // Retrieving data from device
    cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;

//
// Print result matrix
//
    cout << " * Result matrix : " << endl << "     " ;
    for(int i=0 ;i<Height ; i++)
    {
        for(int j=0 ; j<Width ; j++)
            cout << "   " << hst_Mat[i*Width+j] ;
        cout << endl << "     " ;
    }
    cout << endl << endl ;

    // FREE MEMORY
    free(hst_Mat) ;
    cudaFree(dev_Mat) ;

    system("pause") ;

    return 0;
}

但它在[0和1]中生成一个真正的随机值。

怎么做?

1 个答案:

答案 0 :(得分:2)

试试此代码

#include <curand.h>
#include <conio.h>
#include <iostream>

using namespace std;

int iDivUp(int a, int b) { return ((a % b) != 0) ? (a / b + 1) : (a / b); }

void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
{
    curandGenerator_t prng;
    curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);

    curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());

    curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);
}


__global__ void generate_in_a_b(float *A, float a, float b, int nr_rows_A, int nr_cols_A) {

    int tid = threadIdx.x + blockIdx.x * blockDim.x;

    if (tid < nr_rows_A*nr_cols_A) A[tid] = (b-a) * A[tid] + a;

}

int main(void)
{
    float   *hst_Mat , *dev_Mat;

    int Height = 3 ;
    int Width  = 10 ;
    int vSize = Height*Width ;
    int mSize = sizeof(float)*vSize ;

    hst_Mat = (float *)malloc(mSize) ;
    cudaMalloc((void**)&dev_Mat, mSize) ;

    memset(hst_Mat, 0, mSize) ;
    cudaMemset(dev_Mat, 0, mSize) ;

    GPU_fill_rand(dev_Mat, Height, Width) ;

    dim3 threads(32);
    dim3 blocks(iDivUp(Height*Width, 32));

    float a = 3.f; 
    float b = 7.f;

    generate_in_a_b<<<blocks,threads>>>(dev_Mat,a,b,Height,Width);

    cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;

    cout << " * Result matrix : " << endl << "     " ;
    for(int i=0 ;i<Height ; i++)
    {
        for(int j=0 ; j<Width ; j++)
            cout << "   " << hst_Mat[i*Width+j] ;
            cout << endl << "     " ;
    }
    cout << endl << endl ;

    free(hst_Mat) ;
    cudaFree(dev_Mat) ;

    system("pause") ;

    return 0;
}

它将在[a,b]=[3.f,7.f]中返回均匀间隔的数字。

请根据What is the canonical way to check for errors using the CUDA runtime API?添加CUDA错误检查。