当我调用curand时,我总是在一个线程中获得相同的数字。然而,它们对于每个线程是不同的。我在下一个代码中做错了什么?
#define MAXTHREADS 2
#define NBBLOCKS 2
__global__ void testRand ( curandState * state, int nb ){
int id = threadIdx.x + blockIdx.x * blockDim.x;
int value;
for (int i=0;i<nb;i++){
curandState localState = state[id];
value = curand(&localState);
printf("Id %i, value %i\n",id,value);
}
}
__global__ void setup_kernel ( curandState * state, unsigned long seed )
{
int id = threadIdx.x + blockIdx.x * blockDim.x;
curand_init ( seed, id , 0, &state[id] );
}
/**
* Image comes in in horizontal lines
*/
void findOptimum() {
const dim3 blockSize(MAXTHREADS);
const dim3 gridSize(NBBLOCKS);
curandState* devStates;
cudaMalloc ( &devStates,MAXTHREADS*NBBLOCKS*sizeof( curandState ) );
time_t t;
time(&t);
setup_kernel <<< gridSize, blockSize >>> ( devStates, (unsigned long) t );
int nb = 4;
testRand <<< gridSize, blockSize >>> ( devStates,nb);
testRand <<< gridSize, blockSize >>> ( devStates,nb);
cudaFree(devStates);
}
输出:
Id 0, value -1075808309
Id 1, value -1660353324
Id 2, value 1282291714
Id 3, value -1892750252
Id 0, value -1075808309
Id 1, value -1660353324
Id 2, value 1282291714
Id 3, value -1892750252
...
这重复了几次。
答案 0 :(得分:1)
正如talonmies指出的那样,我没有修改全球状态。
在state[id] = localState
行后添加curand(localState)
解决了问题。