我是CUDA的新手。 我想将一叠图像复制到设备上。
unsigned char** _devStackImagesCuda = NULL;
int stackSize = 5;//should be replaced by argument to the function
if(_devStackImagesCuda == NULL)\\allocate array of pointers on the device
{
cudaMalloc(&_devStackImagesCuda, sizeof(unsigned char*) * stackSize);
cudaMemset(_devStackImagesCuda, 0, sizeof(unsigned char*) * stackSize);
}
for(int i = 0; i < stackSize; i++)
{
if(_devStackImagesCuda[i] == NULL) //allocates one image on the device.
cudaMalloc(&_devStackImagesCuda[i], imageSize * sizeof(unsigned char));
cudaMemcpy(_devStackImagesCuda[i], _imageStack->GetImage(i, dummy, true), imageSize, cudaMemcpyHostToDevice);//copy image data to device
}
可以吗?
答案 0 :(得分:2)
如评论中所示,您的方法存在一些问题。
if(_devStackImagesCuda[i] == NULL)
,你试图看看_devStackImagesCuda [i]是否有效,但为了做到这一点,你必须取消引用_devStackImagesCuda
但是你之前已经在这个指针上做了一个cudaMalloc(到一个指针)所以现在它是一个设备指针,你不允许在主机上取消引用。我建议你跟踪你是否需要cudaMalloc这些指针。我相信这样的事情会起作用:
#include <stdio.h>
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
int main(){
unsigned char ** _devStackImagesCuda=0;
int stackSize = 5;
int imageSize = 4;
unsigned char *temp[stackSize];
unsigned char dummy_image[imageSize];
// first create top level pointer
if ( _devStackImagesCuda == 0) //allocate array of pointers on the device
{
cudaMalloc(&_devStackImagesCuda, sizeof(unsigned char*) * stackSize);
cudaCheckErrors("cm 1");
}
// then create child pointers on host, and copy to device, then copy image
for(int i = 0; i < stackSize; i++)
{
cudaMalloc(&temp[i], imageSize * sizeof(unsigned char));
cudaCheckErrors("cm 2");
cudaMemcpy(&(_devStackImagesCuda[i]), &(temp[i]), sizeof(unsigned char *), cudaMemcpyHostToDevice);//copy child pointer to device
cudaCheckErrors("cudamemcopy1");
cudaMemcpy(temp[i], dummy_image, imageSize*sizeof(unsigned char), cudaMemcpyHostToDevice); // copy image to device
cudaCheckErrors("cudamemcpy2");
}
return 0;
}
顺便说一句,如果您可以将图像数组视为一个连续的区域,那么可以简化一些事情。像这样:
unsigned char images[NUM_IMAGES*IMAGE_SIZE]; // or you could malloc this
unsigned char *d_images;
cudaMalloc((void **) d_images, NUM_IMAGES*IMAGE_SIZE*sizeof(unsigned char));
cudaMemcpy(d_images, images, NUM_IMAGES*IMAGE_SIZE*sizeof(unsigned char), cudaMemcpyHostToDevice);
并通过以下方式访问单个图像元素:
unsigned char mypixel = images[i + (IMAGE_SIZE * j)]; // to access element i in image j