我遇到以下代码的问题。以下代码采用输入图像,它应该保存它的灰度。不幸的是,它似乎执行预期的行为,但它只处理图像的一部分而不是整体。似乎问题发生在从设备到主机的cudamemcpy中。
我相信在我在Cuda分配内存时可能会遇到一些问题。
__global__ void rgb2grayCudaKernel(unsigned char *inputImage, unsigned char *grayImage, const int width, const int height)
{
int ty = (blockIdx.x * blockDim.x) + threadIdx.x;
//int tx = (blockIdx.x * blockDim.x) + threadIdx.x;
int tx = (blockIdx.y * blockDim.y) + threadIdx.y;
if( (ty < height && tx<width) )
{
float grayPix = 0.0f;
float r = static_cast< float >(inputImage[(ty * width) + tx]);
float g = static_cast< float >(inputImage[(width * height) + (ty * width) + tx]);
float b = static_cast< float >(inputImage[(2 * width * height) + (ty * width) + tx]);
grayPix = (0.3f * r) + (0.59f * g) + (0.11f * b);
grayImage[(ty * width) + tx] = static_cast< unsigned char >(grayPix);
}
}
//***************************************rgb2gray function, call of kernel in here *************************************
void rgb2grayCuda(unsigned char *inputImage, unsigned char *grayImage, const int width, const int height)
{
unsigned char *inputImage_c, *grayImage_c;
const int sizee= (width*height);
// **********memory allocation for pointers and cuda******************
cudaMalloc((void **) &inputImage_c, sizee);
checkCudaError("im not alloc!");
cudaMalloc((void **) &grayImage_c, sizee);
checkCudaError("gray not alloc !");
//***********copy to device*************************
cudaMemcpy(inputImage_c, inputImage, sizee*sizeof(unsigned char), cudaMemcpyHostToDevice);
checkCudaError("im not send !");
cudaMemcpy(grayImage_c, grayImage, sizee*sizeof(unsigned char), cudaMemcpyHostToDevice);
checkCudaError("gray not send !");
dim3 thrb(32,32);
dim3 numb (ceil(width*height/1024));
//**************Execute Kernel (Timer in here)**************************
NSTimer kernelTime = NSTimer("kernelTime", false, false);
kernelTime.start();
rgb2grayCudaKernel<<<numb,1024>>> (inputImage_c, grayImage_c, width, height);
checkCudaError("kernel!");
kernelTime.stop();
//**************copy back to host*************************
printf("/c");
cudaMemcpy(grayImage, grayImage_c, sizee*sizeof(unsigned char), cudaMemcpyDeviceToHost);
checkCudaError("Receiving data from CPU failed!");
//*********************free memory***************************
cudaFree(inputImage_c);
cudaFree(grayImage_c);
//**********************print time****************
cout << fixed << setprecision(6);
cout << "rgb2gray (cpu): \t\t" << kernelTime.getElapsed() << " seconds." << endl;
}
答案 0 :(得分:1)
const int sizee= (width*height);
应该是:
const int sizee= (width*height*3);
表示rgb数据(每个通道1个字节)。
我相信位图图像,颜色应该交错,如下所示:
rgb of pixel1, rgb of pixel 2 ... rgb of pixel width*height
因此你的内核应该是:
__global__ void rgb2grayCudaKernel(unsigned char *inputImage, unsigned char *grayImage, const int width, const int height)
{
int tx = (blockIdx.y * blockDim.y) + threadIdx.y;
int ty = (blockIdx.x * blockDim.x) + threadIdx.x;
if( (ty < height && tx<width) )
{
unsigned int pixel = ty*width+tx;
float grayPix = 0.0f;
float r = static_cast< float >(inputImage[pixel*3]);
float g = static_cast< float >(inputImage[pixel*3+1]);
float b = static_cast< float >(inputImage[pixel*3+2]);
grayPix = (0.3f * r) + (0.59f * g) + (0.11f * b);
grayImage[pixel] = static_cast< unsigned char >(grayPix);
}
}
另外,从我看到的光度计算为0.21 R + 0.72 G + 0.07 B。