是否有可能(深度)复制设备中的所有动态分配内存(以指针方式排列)?

时间:2015-09-06 00:18:56

标签: cuda dynamic-memory-allocation

这就像我想做的事情:

__global__ void malloc(int **d_array) 
{ 
    int* ptr = (int*)malloc(x);
    // x is different within each thread and cannot be determined
    // compile time..

    // then fill that space with something and put it in the d_array

    d_array[threadIdx.x] = ptr;
} 
void main() 
{ 
    int **d_array; int *h_array;
    int blockSize = 32;
    cudaThreadSetLimit(cudaLimitMallocHeapSize, somethingBigEnough);
    cudaMalloc(d_array, sizeof(int *) * 32); 

    mallocTest<<<1, blockSize>>>(d_array); 
    cudaThreadSynchronize();

    //here is what I want:
    h_array = (int *)malloc(sizeof(int) * maxSize);
    cudaDeepMemcpy(h_array, d_array, cudaMemcpyDeviceToHost);
}

我正在寻找类似于&#34; deepMemcpy&#34;在库达。如果不支持,我可以用什么方法将生成的运行时数据复制回主机?

0 个答案:

没有答案