为了在主机和设备上有更多的舒适处理内存,我创建了以下类。 从理论上讲,它应该管理从主机到设备的复制,反之亦然。
struct CudaArray
{
int* memoryHost;
int* memoryDevice;
int size;
CudaArray(int datasize) // creates array on host and allocates memory on device with cudaMalloc
{
size = datasize;
memoryHost = new int[size];
for (int i = 0; i < size; i++)
{
memoryHost[i] = 0;
}
cudaMalloc((void**)&memoryDevice, sizeof(int) * size);
}
~CudaArray() // frees memory on device and host
{
delete[] memoryHost;
cudaFree(memoryDevice);
}
void Upload() // upload data from host to device
{
cudaMemcpy(memoryDevice, memoryHost, sizeof(int) * size, cudaMemcpyHostToDevice);
}
void Download() // download data from device to host
{
cudaMemcpy(memoryHost, memoryDevice, sizeof(int) * size, cudaMemcpyDeviceToHost);
}
void Insert(int* src); // copy from src to memoryHost
void Retrieve(int* dest); // copy from memoryHost to dest
};
在内部,一切都很好。但是当我使用我的CudaArray的一个对象时,指针存在问题:
CudaArray cuda_ar(1000);
kernel <<<blocks, threads_per_block>>> (cuda_ar.memoryDevice, cuda_ar.size);
通过使用调试器,我设法读取指针memoryDevice。在结构内部(例如,当单步执行Upload()时,它是0x01000000。但是在执行内核的地方,memoryDevice指向0x00000400(数字是示例)。
我知道memoryDevice是一个指向设备内存的指针。 有没有办法解释这种行为并解决我的问题?
答案 0 :(得分:1)
当我运行以下程序时
#include <cstdio>
struct CudaArray
{
int* memoryHost;
int* memoryDevice;
int size;
CudaArray(int datasize) // creates array on host and allocates memory on device with cudaMalloc
{
size = datasize;
memoryHost = new int[size];
for (int i = 0; i < size; i++)
{
memoryHost[i] = 0;
}
cudaMalloc((void**)&memoryDevice, sizeof(int) * size);
}
~CudaArray() // frees memory on device and host
{
delete[] memoryHost;
cudaFree(memoryDevice);
}
void Upload() // upload data from host to device
{
cudaMemcpy(memoryDevice, memoryHost, sizeof(int) * size, cudaMemcpyHostToDevice);
}
void Download() // download data from device to host
{
cudaMemcpy(memoryHost, memoryDevice, sizeof(int) * size, cudaMemcpyDeviceToHost);
}
};
__global__ void kernel(int *ptr, int n)
{
printf("On Device : %p %d\n", ptr, n);
}
int main(void)
{
CudaArray cuda_ar(1000);
printf("On Host : %p %d\n", cuda_ar.memoryDevice, cuda_ar.size);
kernel<<<1, 1>>>(cuda_ar.memoryDevice, cuda_ar.size);
return 0;
}
我得到了
On Host : 0x200400000 1000
On Device : 0x200400000 1000
您应该确保您的CUDA运行时调用如cudaMalloc,cudaMemcpy和内核启动都已成功返回。您可以在所有CUDA运行时调用之后尝试此代码以验证:
if (cudaSuccess != cudaGetLastError())
printf( "Error!\n" );