如何用curand生成唯一的随机整数?

时间:2019-09-12 07:49:15

标签: random cuda integer unique

我需要使用cuda生成范围(A,B)内的N个唯一随机整数。我希望它们是均匀分布的,但是我不知道这是否与每个数字都是唯一的必要相冲突。

此问题之前未作任何回答,但带有编码提示。

如何在一个间隔内不重复地生成固定数量的唯一随机整数?

我的如下尝试会生成随机数,但它们不是唯一的。

#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>
#include <math.h>
#include <assert.h>

__global__ void setup_kernel ( curandState * state, unsigned long seed )
{
    int id = threadIdx.x;
    curand_init ( seed, id, 0, &state[id] );
} 

__global__ void generate( curandState* globalState, int * result, int *max, int *min, int count ) 
{
    int ind = threadIdx.x;
    curandState localState = globalState[ind];
    float RANDOM = curand_uniform( &localState );
    globalState[ind] = localState; 

    if (ind < count)

        result[ind] = truncf(*min +(*max - *min)*RANDOM);
}

int main( int argc, char** argv) 
{
    int N = 32; // no of random numbers to be generated

    int MIN = 10; // max range of random number
    int MAX = 100; // min range of random number

    dim3 tpb(N,1,1);
    curandState* devStates;
    cudaMalloc ( &devStates, N*sizeof( curandState ) );

    // setup seeds
    setup_kernel <<< 1, tpb >>> ( devStates, time(NULL) );

    int *d_result, *h_result;

    cudaMalloc(&d_result, N * sizeof(int));
    h_result = (int *)malloc(N * sizeof(int));

    int *d_max, *h_max, *d_min, *h_min;

    cudaMalloc(&d_max, sizeof(int));
    h_max = (int *)malloc(sizeof(int));

    cudaMalloc(&d_min, sizeof(int));
    h_min = (int *)malloc(sizeof(int));

    *h_max =MAX;
    *h_min =MIN;

    cudaMemcpy(d_max, h_max, sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_min, h_min, sizeof(int), cudaMemcpyHostToDevice);

    // generate random numbers
    generate <<< 1, tpb >>> ( devStates, d_result, d_max, d_min, N );

    cudaMemcpy(h_result, d_result, N * sizeof(float), cudaMemcpyDeviceToHost);

      for (int i = 0; i < N; i++)
    printf("random number= %d\n", h_result[i]);

    return 0;
}

20, 39, 43, 72, 39, 70, 58, 31, 44, 47, 30, 26, 42, 35, 20, 66, 94, 81, 42(repeated), 50, 90, 31(repeated), 51, 53, 39(repeated), 20, 66, 37, 42(repeated), 21, 45, 57

1 个答案:

答案 0 :(得分:1)

一种可能的方法,效率可能比注释中提到的the Fisher-Yates shuffle低得多:

  1. 确定要从(B-A)中选择的整数范围的长度。使用此长度的CURAND生成一组随机数。

  2. 使用按键排序(例如thrust::sort_by_key),同时使用此随机数序列和要选择的整数范围的序列,以重新排序该序列。

  3. 从该序列中获取前N个数字(其中N是所需的要生成的随机整数个数),作为您选择的值。

在要选择的整数范围(B-A)的长度暗示内存要求超过GPU可以容纳的范围时,这显然是禁止的。按键推力排序需要O(N)临时存储,因此,在整数* 8字节的范围超过可用GPU内存的40%时,这将变得不可行。

这具有使用普通库实现相对简单的优点。它的缺点是效率可能比专家编写的F-Y随机播放低得多。但是从我所看到的,F-Y洗牌要求:

  • 所需序列(A,B)中的所有整数都驻留在内存中
  • 将生成一组随机数,其中该随机数至少为(B-A)个大小
  • 可以使用全局同步

这是一个例子:

$ cat t1504.cu
#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>
#include <math.h>
#include <assert.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>

__global__ void setup_kernel ( curandState * state, unsigned long seed, int n)
{
    int id = threadIdx.x+blockDim.x*blockIdx.x;
    if (id < n)
      curand_init ( seed, id, 0, &state[id] );
}

__global__ void generate( curandState* globalState, float * result, int count )
{
    int ind = threadIdx.x+blockDim.x*blockIdx.x;
    if (ind < count){
      curandState localState = globalState[ind];
      float RANDOM = curand_uniform( &localState );
      globalState[ind] = localState;
      result[ind] = RANDOM;}
}

int main( int argc, char** argv)
{
    int N = 32; // no of random numbers to be generated

    int MIN = 10; // max range of random number
    int MAX = 100; // min range of random number

    curandState* devStates;
    int R = MAX-MIN;
    cudaMalloc ( &devStates, R*sizeof( curandState ) );

    // setup seeds
    setup_kernel <<< (R+255)/256, 256 >>> ( devStates, time(NULL), R );

    float *d_result;

    cudaMalloc(&d_result, R * sizeof(float));

    // generate random numbers
    generate <<< (R+255)/256, 256>>> ( devStates, d_result, R );
    thrust::device_vector<int> d_r(R);
    thrust::sequence(d_r.begin(), d_r.end(), MIN);
    thrust::device_ptr<float> dp_res = thrust::device_pointer_cast(d_result);

    thrust::sort_by_key(dp_res, dp_res+R, d_r.begin());
    thrust::host_vector<int> h_result = d_r;
      for (int i = 0; i < N; i++)
    printf("random number= %d\n", h_result[i]);

    return 0;
}
$ nvcc -o t1504 t1504.cu -lcurand
[user2@dc10 misc]$ ./t1504
random number= 16
random number= 97
random number= 31
random number= 80
random number= 61
random number= 21
random number= 98
random number= 70
random number= 46
random number= 41
random number= 30
random number= 71
random number= 52
random number= 92
random number= 48
random number= 39
random number= 59
random number= 63
random number= 96
random number= 40
random number= 81
random number= 32
random number= 34
random number= 79
random number= 73
random number= 49
random number= 19
random number= 24
random number= 11
random number= 78
random number= 42
random number= 12
$