在cuda应用程序中,我需要在a和b之间的间隔填充一个随机值的矩阵。
我使用网络上已有的代码,使用CURAND,但我不能修改它以产生a和b之间的值。
代码如下:
// Fill the array A(nr_rows_A, nr_cols_A) with random numbers on GPU
void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
{
// Create a pseudo-random number generator
curandGenerator_t prng;
curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);
// Set the seed for the random number generator using the system clock
curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());
// Fill the array with random numbers on the device
curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);
}
int main(void)
{
// Variables declaration
float *hst_Mat ,
*dev_Mat;
int Height = 3 ;
int Width = 10 ;
int vSize = Height*Width ;
int mSize = sizeof(float)*vSize ;
hst_Mat = (float *)malloc(mSize) ;
cudaMalloc((void**)&dev_Mat, mSize) ;
memset(hst_Mat, 0, mSize) ;
cudaMemset(dev_Mat, 0, mSize) ;
// Print initial matrix
cout << " * Initial matrix : " << endl << "\t" ;
for(int i=0 ;i<Height ; i++)
{
for(int j=0 ; j<Width ; j++)
cout << "\t" << hst_Mat[i*Width+j] ;
cout << endl << "\t" ;
}
cout << endl << endl ;
//
// Cuda kernel invoke
//
// Initializing device state for random generator
GPU_fill_rand(dev_Mat, Height, Width) ;
// Retrieving data from device
cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;
//
// Print result matrix
//
cout << " * Result matrix : " << endl << " " ;
for(int i=0 ;i<Height ; i++)
{
for(int j=0 ; j<Width ; j++)
cout << " " << hst_Mat[i*Width+j] ;
cout << endl << " " ;
}
cout << endl << endl ;
// FREE MEMORY
free(hst_Mat) ;
cudaFree(dev_Mat) ;
system("pause") ;
return 0;
}
但它在[0和1]中生成一个真正的随机值。
怎么做?
答案 0 :(得分:2)
试试此代码
#include <curand.h>
#include <conio.h>
#include <iostream>
using namespace std;
int iDivUp(int a, int b) { return ((a % b) != 0) ? (a / b + 1) : (a / b); }
void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
{
curandGenerator_t prng;
curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);
curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());
curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);
}
__global__ void generate_in_a_b(float *A, float a, float b, int nr_rows_A, int nr_cols_A) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < nr_rows_A*nr_cols_A) A[tid] = (b-a) * A[tid] + a;
}
int main(void)
{
float *hst_Mat , *dev_Mat;
int Height = 3 ;
int Width = 10 ;
int vSize = Height*Width ;
int mSize = sizeof(float)*vSize ;
hst_Mat = (float *)malloc(mSize) ;
cudaMalloc((void**)&dev_Mat, mSize) ;
memset(hst_Mat, 0, mSize) ;
cudaMemset(dev_Mat, 0, mSize) ;
GPU_fill_rand(dev_Mat, Height, Width) ;
dim3 threads(32);
dim3 blocks(iDivUp(Height*Width, 32));
float a = 3.f;
float b = 7.f;
generate_in_a_b<<<blocks,threads>>>(dev_Mat,a,b,Height,Width);
cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;
cout << " * Result matrix : " << endl << " " ;
for(int i=0 ;i<Height ; i++)
{
for(int j=0 ; j<Width ; j++)
cout << " " << hst_Mat[i*Width+j] ;
cout << endl << " " ;
}
cout << endl << endl ;
free(hst_Mat) ;
cudaFree(dev_Mat) ;
system("pause") ;
return 0;
}
它将在[a,b]=[3.f,7.f]
中返回均匀间隔的数字。
请根据What is the canonical way to check for errors using the CUDA runtime API?添加CUDA错误检查。