我是CUDA的新手,当我尝试将阵列从主机复制到设备时出现错误。 错误:没有重载功能的实例" cudaMalloc"匹配参数listargument类型是:(int(*)[1048576],unsigned long)
#include <assert.h>
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <time.h>
#include <unistd.h>
#include <curand.h>
#include <curand_kernel.h>
#define N (1024*1024)
#define M (1000000)
/**************************************************/
/* this GPU kernel function is used to initialize the random states */
__global__ void init(unsigned int seed, curandState_t* states) {
/* we have to initialize the state */
curand_init(seed, /* the seed can be the same for each core, here we pass the time in from the CPU */
blockIdx.x, /* the sequence number should be different for each core (unless you want all
cores to get the same sequence of numbers for some reason - use thread id! */
0, /* the offset is how much extra we advance in the sequence for each call, can be 0 */
&states[blockIdx.x]);
}
/* this GPU kernel takes an array of states, and an array of ints, and puts a random int into each */
__global__ void randoms(curandState_t* states, unsigned int* numbers) {
/* curand works like rand - except that it takes a state as a parameter */
numbers[blockIdx.x] = curand(&states[blockIdx.x]) %2000;
};
/*******************************************************/
__global__ void cudakernel(int *buf)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
// buf[i] = rand();
for(int j = 0; j < M; j++)
buf[i] = buf[i] * buf[i] - 0.25f;
}
int main()
{
/*****************************************************/
/* CUDA's random number library uses curandState_t to keep track of the seed value
we will store a random state for every thread */
curandState_t* states;
/* allocate space on the GPU for the random states */
cudaMalloc((void**) &states, N * sizeof(curandState_t));
/* invoke the GPU to initialize all of the random states */
init<<<N, 1>>>(time(0), states);
/* allocate an array of unsigned ints on the CPU and GPU */
// unsigned int cpu_nums[N];//getting error in median relared to type of int
unsigned int* gpu_nums;
int cpu_nums[N];
cudaMalloc((void**) &gpu_nums, N * sizeof(unsigned int));
/* invoke the kernel to get some random numbers */
randoms<<<N, 1>>>(states, gpu_nums);
/* copy the random numbers back */
cudaMemcpy(cpu_nums, gpu_nums, N * sizeof(unsigned int), cudaMemcpyDeviceToHost);
/******************************************************************************/
int data[N];// int count = 0;
int cpunums[N],i;
for (i=0;i<=N;i++)
cpunums[i]=cpu_nums[i];
cudaMalloc(&cpunums, N * sizeof(int));
cudakernel<<<N/256, 256>>>(cpunums);
cudaMemcpy(data, cpunums, N * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(cpunums);
int sel;
printf("Enter an index: ");
scanf("%d", &sel);
printf("data[%d] = %f\n", sel, data[sel]);
}
我正在尝试将cpunums [i]数组从主机复制到设备agine,我会从设备中生成一个随机数字。
我试着调用设备功能但是我遇到了很多错误。所以我试过这种方式。
答案 0 :(得分:1)
cudaMalloc 功能代表在设备的全局内存上分配缓冲区。 您要做的是主机到设备的复制,因此您必须使用 cudaMemcpy 功能:
主机 cudaError_t cudaMemcpy(void * dst,const void * src,size_t count,cudaMemcpyKind kind)
将src指向的内存区域中的计数字节复制到 dst指向的内存区域,其中kind指定方向 副本
请参阅:CUDA Runtime API Documentation : cudaMemcpy
显然,cudaMemcpy将在设备端写入的缓冲区必须先使用cudaMalloc进行分配。