如何使用CUDA操作16位图像?

时间:2018-03-16 10:11:26

标签: c++ opencv cuda

我正在尝试在CUDA中操作一个16位图像,但我无法让它工作。 我正在使用opencv进行捕获,然后我想将帧发送到cuda一对一复制(后来我添加了我的过滤器)并使用imshow显示它。

这是我的主要内容:

int main (int argc, char** argv)
{
    // Open a webcamera
    cv::VideoCapture camera(0);
    cv::Mat frame;
    cv::Mat gray;
    if(!camera.isOpened()) 
        return -1;

    camera.set(CV_CAP_PROP_FRAME_WIDTH,640);
    camera.set(CV_CAP_PROP_FRAME_HEIGHT,480);

    cv::namedWindow("Greyscale");
    cv::namedWindow("CUDA");

    cv::Mat blurred = cv::Mat(480,640,CV_16UC1);
    cv::Mat source;
    uint16_t *sourceDataDevice, *destinationDataDevice; 


    CudaSafeCall(cudaMalloc( (void**) &sourceDataDevice, 640*480*sizeof(uint16_t)));
    CudaSafeCall(cudaMalloc( (void**) &destinationDataDevice, 640*480*sizeof(uint16_t)));

    while(1)
    {
        camera >> frame;
        cv::cvtColor(frame, source, CV_BGR2GRAY);
        source.convertTo(source, CV_16UC1);

        CudaSafeCall(cudaMemcpy( sourceDataDevice, source.data, 640*480*sizeof(uint16_t), cudaMemcpyHostToDevice ));

        int w = source.size().width;
        int h = source.size().height;

        dim3 blocks (w/16 + 1, h/16 + 1);
        dim3 threads(16, 16);


        convolve<<<blocks,threads>>>(sourceDataDevice, w, h, destinationDataDevice);
        cudaThreadSynchronize();
        CudaCheckError();

        CudaSafeCall(cudaMemcpy( blurred.data, destinationDataDevice, 640*480*sizeof(uint16_t), cudaMemcpyDeviceToHost ));

        source.convertTo(source, CV_8U);
        cv::imshow("Greyscale", source);
        blurred.convertTo(blurred, CV_8U);

        cv::imshow("Blurred", blurred);

        if(cv::waitKey(1) == 27) break;
    }

    CudaSafeCall( cudaFree(destinationDataDevice) );
    CudaSafeCall( cudaFree(sourceDataDevice) );

    return 0;
}

这是我的内核:

__global__ void convolve(uint16_t *source, int width, int height, uint16_t *destination)
{
    // Calculate our pixel's location
    int x = (blockIdx.x * blockDim.x) + threadIdx.x;
    int y = (blockIdx.y * blockDim.y) + threadIdx.y;
    // Only execute for valid pixels

    if(x < width * height)
    {
        int index = (y * width) + x;
        destination[index] = source[index];
    }
}

输出是覆盖输入的1/4的图像。

有人能指出我正确的方向吗?

0 个答案:

没有答案