我正在尝试在CUDA中使用CuRand库。我只想简单地尝试为每个线程生成一个随机整数。以下是我的结果(显然不是很随机):
84
84
84
84
84
5
请检查我的代码并告诉我我做错了什么,我试着弄清楚为什么这不起作用......
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <curand.h>
#include <curand_kernel.h>
__device__ float generate(curandState* globalState, int ind)
{
//int ind = threadIdx.x;
curandState localState = globalState[ind];
float RANDOM = curand_uniform( &localState );
globalState[ind] = localState;
return RANDOM;
}
__global__ void setup_kernel ( curandState * state, unsigned long seed )
{
int id = threadIdx.x;
curand_init ( seed, id, 0, &state[id] );
}
__global__ void addToCount(int N, int *y, curandState* globalState)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;
while (id < N)
{
int number = generate(globalState, id) * 1000000;
printf("%i\n", number);
atomicAdd(&(y[0]), number);
id += blockDim.x * gridDim.x;
}
}
int main(void)
{
int N = 5;
int *y, *d_y;
y = (int*)malloc(N*sizeof(int));
cudaMalloc(&d_y, N * sizeof(int));
cudaMemcpy(d_y, y, N * sizeof(int), cudaMemcpyHostToDevice);
curandState* devStates;
cudaMalloc (&devStates, N * sizeof(curandState));
addToCount<<<2, 5>>>(N, d_y, devStates);
cudaMemcpy(y, d_y, N*sizeof(int), cudaMemcpyDeviceToHost);
printf("%i\n", *y);
}
答案 0 :(得分:1)
main
更改为:
int main(void)
{
int N = 5;
int *y, *d_y;
y = (int*)malloc(N*sizeof(int));
cudaMalloc(&d_y, N * sizeof(int));
cudaMemcpy(d_y, y, N * sizeof(int), cudaMemcpyHostToDevice);
curandState* devStates;
cudaMalloc (&devStates, N * sizeof(curandState));
srand(time(0));
/** ADD THESE TWO LINES **/
int seed = rand();
setup_kernel<<<2, 5>>>(devStates,seed);
/** END ADDITION **/
addToCount<<<2, 5>>>(N, d_y, devStates);
cudaMemcpy(y, d_y, N*sizeof(int), cudaMemcpyDeviceToHost);
printf("%i\n", *y);
}
默认编译结果很好:
nvcc /tmp/so.cu -o /tmp/so
$ /tmp/so
900981
469952
494161
31968
880329
2777391
$ /tmp/so
525835
742594
750423
117137
66318
2202307
$ /tmp/so
919262
60838
89868
57696
770764
1898428