
时间:2014-01-14 11:43:58

标签: random cuda




// Fill the array A(nr_rows_A, nr_cols_A) with random numbers on GPU
void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
    // Create a pseudo-random number generator
    curandGenerator_t prng;
    curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);

    // Set the seed for the random number generator using the system clock
    curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());

    // Fill the array with random numbers on the device
    curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);

int main(void)
    // Variables declaration
    float   *hst_Mat ,

    int Height = 3 ;
    int Width  = 10 ;
    int vSize = Height*Width ;
    int mSize = sizeof(float)*vSize ;

    hst_Mat = (float *)malloc(mSize) ;
    cudaMalloc((void**)&dev_Mat, mSize) ;

    memset(hst_Mat, 0, mSize) ;
    cudaMemset(dev_Mat, 0, mSize) ;

    // Print initial matrix
    cout << " * Initial matrix : " << endl << "\t" ;
    for(int i=0 ;i<Height ; i++)
        for(int j=0 ; j<Width ; j++)
            cout << "\t" << hst_Mat[i*Width+j] ;
        cout << endl << "\t" ;
    cout << endl << endl ;

// Cuda kernel invoke
    // Initializing device state for random generator
    GPU_fill_rand(dev_Mat, Height, Width) ;

    // Retrieving data from device
    cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;

// Print result matrix
    cout << " * Result matrix : " << endl << "     " ;
    for(int i=0 ;i<Height ; i++)
        for(int j=0 ; j<Width ; j++)
            cout << "   " << hst_Mat[i*Width+j] ;
        cout << endl << "     " ;
    cout << endl << endl ;

    free(hst_Mat) ;
    cudaFree(dev_Mat) ;

    system("pause") ;

    return 0;



1 个答案:

答案 0 :(得分:2)


#include <curand.h>
#include <conio.h>
#include <iostream>

using namespace std;

int iDivUp(int a, int b) { return ((a % b) != 0) ? (a / b + 1) : (a / b); }

void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
    curandGenerator_t prng;
    curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);

    curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());

    curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);

__global__ void generate_in_a_b(float *A, float a, float b, int nr_rows_A, int nr_cols_A) {

    int tid = threadIdx.x + blockIdx.x * blockDim.x;

    if (tid < nr_rows_A*nr_cols_A) A[tid] = (b-a) * A[tid] + a;


int main(void)
    float   *hst_Mat , *dev_Mat;

    int Height = 3 ;
    int Width  = 10 ;
    int vSize = Height*Width ;
    int mSize = sizeof(float)*vSize ;

    hst_Mat = (float *)malloc(mSize) ;
    cudaMalloc((void**)&dev_Mat, mSize) ;

    memset(hst_Mat, 0, mSize) ;
    cudaMemset(dev_Mat, 0, mSize) ;

    GPU_fill_rand(dev_Mat, Height, Width) ;

    dim3 threads(32);
    dim3 blocks(iDivUp(Height*Width, 32));

    float a = 3.f; 
    float b = 7.f;


    cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;

    cout << " * Result matrix : " << endl << "     " ;
    for(int i=0 ;i<Height ; i++)
        for(int j=0 ; j<Width ; j++)
            cout << "   " << hst_Mat[i*Width+j] ;
            cout << endl << "     " ;
    cout << endl << endl ;

    free(hst_Mat) ;
    cudaFree(dev_Mat) ;

    system("pause") ;

    return 0;


请根据What is the canonical way to check for errors using the CUDA runtime API?添加CUDA错误检查。
