在CUDA中将数组从主机复制到设备

时间:2017-02-26 12:37:55

标签: cuda

我是CUDA的新手,当我尝试将阵列从主机复制到设备时出现错误。 错误:没有重载功能的实例" cudaMalloc"匹配参数listargument类型是:(int(*)[1048576],unsigned long)

#include <assert.h>
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <time.h>
#include <unistd.h>
#include <curand.h>
#include <curand_kernel.h>



#define N (1024*1024)
  #define M (1000000)

/**************************************************/
/* this GPU kernel function is used to initialize the random states */
__global__ void init(unsigned int seed, curandState_t* states) {

    /* we have to initialize the state */
    curand_init(seed, /* the seed can be the same for each core, here we pass the time in from the CPU */
                blockIdx.x, /* the sequence number should be different for each core (unless you want all
                               cores to get the same sequence of numbers for some reason - use thread id! */
                0, /* the offset is how much extra we advance in the sequence for each call, can be 0 */
                &states[blockIdx.x]);
}

/* this GPU kernel takes an array of states, and an array of ints, and puts a random int into each */
__global__ void randoms(curandState_t* states, unsigned int* numbers) {
    /* curand works like rand - except that it takes a state as a parameter */
    numbers[blockIdx.x] = curand(&states[blockIdx.x]) %2000;
};

/*******************************************************/

  __global__ void cudakernel(int *buf)
  {
     int i = threadIdx.x + blockIdx.x * blockDim.x;
    // buf[i] = rand();
     for(int j = 0; j < M; j++)
        buf[i] = buf[i] * buf[i] - 0.25f;
  }

  int main()

  {
/*****************************************************/
/* CUDA's random number library uses curandState_t to keep track of the seed value
       we will store a random state for every thread  */
    curandState_t* states;

    /* allocate space on the GPU for the random states */
    cudaMalloc((void**) &states, N * sizeof(curandState_t));

    /* invoke the GPU to initialize all of the random states */
    init<<<N, 1>>>(time(0), states);

    /* allocate an array of unsigned ints on the CPU and GPU */
   // unsigned int cpu_nums[N];//getting error in median relared to type of int
    unsigned int* gpu_nums;
    int cpu_nums[N];
    cudaMalloc((void**) &gpu_nums, N * sizeof(unsigned int));

    /* invoke the kernel to get some random numbers */
    randoms<<<N, 1>>>(states, gpu_nums);

    /* copy the random numbers back */
    cudaMemcpy(cpu_nums, gpu_nums, N * sizeof(unsigned int), cudaMemcpyDeviceToHost);

/******************************************************************************/ 

     int data[N];// int count = 0;

     int cpunums[N],i;
     for (i=0;i<=N;i++)

     cpunums[i]=cpu_nums[i];


     cudaMalloc(&cpunums, N * sizeof(int));
     cudakernel<<<N/256, 256>>>(cpunums);
     cudaMemcpy(data, cpunums, N * sizeof(int), cudaMemcpyDeviceToHost);
     cudaFree(cpunums); 

     int sel;
     printf("Enter an index: ");
     scanf("%d", &sel);
     printf("data[%d] = %f\n", sel, data[sel]);
  }

我正在尝试将cpunums [i]数组从主机复制到设备agine,我会从设备中生成一个随机数字。

我试着调用设备功能但是我遇到了很多错误。所以我试过这种方式。

1 个答案:

答案 0 :(得分:1)

cudaMalloc 功能代表在设备的全局内存上分配缓冲区。 您要做的是主机到设备的复制,因此您必须使用 cudaMemcpy 功能:

  

主机 cudaError_t cudaMemcpy(void * dst,const void * src,size_t count,cudaMemcpyKind kind)

     

将src指向的内存区域中的计数字节复制到   dst指向的内存区域,其中kind指定方向   副本

请参阅:CUDA Runtime API Documentation : cudaMemcpy

显然,cudaMemcpy将在设备端写入的缓冲区必须先使用cudaMalloc进行分配。