我正在尝试在CUDA中操作一个16位图像,但我无法让它工作。 我正在使用opencv进行捕获,然后我想将帧发送到cuda一对一复制(后来我添加了我的过滤器)并使用imshow显示它。
这是我的主要内容:
int main (int argc, char** argv)
{
// Open a webcamera
cv::VideoCapture camera(0);
cv::Mat frame;
cv::Mat gray;
if(!camera.isOpened())
return -1;
camera.set(CV_CAP_PROP_FRAME_WIDTH,640);
camera.set(CV_CAP_PROP_FRAME_HEIGHT,480);
cv::namedWindow("Greyscale");
cv::namedWindow("CUDA");
cv::Mat blurred = cv::Mat(480,640,CV_16UC1);
cv::Mat source;
uint16_t *sourceDataDevice, *destinationDataDevice;
CudaSafeCall(cudaMalloc( (void**) &sourceDataDevice, 640*480*sizeof(uint16_t)));
CudaSafeCall(cudaMalloc( (void**) &destinationDataDevice, 640*480*sizeof(uint16_t)));
while(1)
{
camera >> frame;
cv::cvtColor(frame, source, CV_BGR2GRAY);
source.convertTo(source, CV_16UC1);
CudaSafeCall(cudaMemcpy( sourceDataDevice, source.data, 640*480*sizeof(uint16_t), cudaMemcpyHostToDevice ));
int w = source.size().width;
int h = source.size().height;
dim3 blocks (w/16 + 1, h/16 + 1);
dim3 threads(16, 16);
convolve<<<blocks,threads>>>(sourceDataDevice, w, h, destinationDataDevice);
cudaThreadSynchronize();
CudaCheckError();
CudaSafeCall(cudaMemcpy( blurred.data, destinationDataDevice, 640*480*sizeof(uint16_t), cudaMemcpyDeviceToHost ));
source.convertTo(source, CV_8U);
cv::imshow("Greyscale", source);
blurred.convertTo(blurred, CV_8U);
cv::imshow("Blurred", blurred);
if(cv::waitKey(1) == 27) break;
}
CudaSafeCall( cudaFree(destinationDataDevice) );
CudaSafeCall( cudaFree(sourceDataDevice) );
return 0;
}
这是我的内核:
__global__ void convolve(uint16_t *source, int width, int height, uint16_t *destination)
{
// Calculate our pixel's location
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
// Only execute for valid pixels
if(x < width * height)
{
int index = (y * width) + x;
destination[index] = source[index];
}
}
输出是覆盖输入的1/4的图像。
有人能指出我正确的方向吗?