在类中包装cuda内存处理会导致内存地址损坏

时间:2012-01-19 14:50:18

标签: c++ cuda

为了在主机和设备上有更多的舒适处理内存,我创建了以下类。 从理论上讲,它应该管理从主机到设备的复制,反之亦然。

struct CudaArray
{

int* memoryHost;
int* memoryDevice;

int size;

CudaArray(int datasize) // creates array on host and allocates memory on device with cudaMalloc
{
    size = datasize;
    memoryHost = new int[size];

    for (int i = 0; i < size; i++)
    {
        memoryHost[i] = 0;
    }

    cudaMalloc((void**)&memoryDevice, sizeof(int) * size);
}

~CudaArray() // frees memory on device and host
{
    delete[] memoryHost;
    cudaFree(memoryDevice);
}

void Upload() // upload data from host to device
{
    cudaMemcpy(memoryDevice, memoryHost, sizeof(int) * size, cudaMemcpyHostToDevice);
}
void Download() // download data from device to host
{
    cudaMemcpy(memoryHost, memoryDevice, sizeof(int) * size, cudaMemcpyDeviceToHost);
}

void Insert(int* src); // copy from src to memoryHost
void Retrieve(int* dest); // copy from memoryHost to dest
};

在内部,一切都很好。但是当我使用我的CudaArray的一个对象时,指针存在问题:

CudaArray cuda_ar(1000);
kernel <<<blocks, threads_per_block>>> (cuda_ar.memoryDevice, cuda_ar.size);

通过使用调试器,我设法读取指针memoryDe​​vice。在结构内部(例如,当单步执行Upload()时,它是0x01000000。但是在执行内核的地方,memoryDe​​vice指向0x00000400(数字是示例)。

我知道memoryDe​​vice是一个指向设备内存的指针。 有没有办法解释这种行为并解决我的问题?

1 个答案:

答案 0 :(得分:1)

当我运行以下程序时

#include <cstdio>
struct CudaArray
{

  int* memoryHost;
  int* memoryDevice;

  int size;

  CudaArray(int datasize) // creates array on host and allocates memory on device with cudaMalloc
  {
    size = datasize;
    memoryHost = new int[size];

    for (int i = 0; i < size; i++)
    {
      memoryHost[i] = 0;
    }

    cudaMalloc((void**)&memoryDevice, sizeof(int) * size);
}

~CudaArray() // frees memory on device and host
{
  delete[] memoryHost;
  cudaFree(memoryDevice);
}

void Upload() // upload data from host to device
{
  cudaMemcpy(memoryDevice, memoryHost, sizeof(int) * size, cudaMemcpyHostToDevice);
}
void Download() // download data from device to host
{
  cudaMemcpy(memoryHost, memoryDevice, sizeof(int) * size, cudaMemcpyDeviceToHost);
}

};

__global__ void kernel(int *ptr, int n)
{
  printf("On Device : %p %d\n", ptr, n);
}

int main(void)
{
  CudaArray cuda_ar(1000);
  printf("On Host   : %p %d\n", cuda_ar.memoryDevice, cuda_ar.size);
  kernel<<<1, 1>>>(cuda_ar.memoryDevice, cuda_ar.size);
  return 0;
}

我得到了

On Host   : 0x200400000 1000
On Device : 0x200400000 1000

您应该确保您的CUDA运行时调用如cudaMalloc,cudaMemcpy和内核启动都已成功返回。您可以在所有CUDA运行时调用之后尝试此代码以验证:

if (cudaSuccess != cudaGetLastError())
  printf( "Error!\n" );