Question

我对使用OpenCL进行编程非常陌生，英语不是我的母语，所以请原谅任何错误。

我想做的是使用OpenCV加载输入图像并使用过滤器执行图像卷积。计算是在带有OpenCL的GPU上完成的。

该方法是遍历图像并使用.at CV函数提取像素值，并将像素值写入一维数组。将这些阵列传递到GPU设备时，会出现错误消息Stack-Overflow。

任何评论都很好。预先谢谢你。

#define _CRT_SECURE_NO_WARNINGS

#include <iostream>
#include <fstream>
#include <CL/cl.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>

#define PROGRAM_FILE "convolution.cl"
#define KERNEL_FUNC "convolution"

int main()
{
    //Read image to host
    cv::Mat Input;
    Input = cv::imread("simpsons.jpg", CV_LOAD_IMAGE_GRAYSCALE);

    //Rows and columns of input image
    int imageHeight, imageWidth, imageSize;
    imageHeight = Input.rows;   //768
    imageWidth = Input.cols;    //1024
    imageSize = imageHeight * imageWidth;   //1024*768

    //Extract Pixel values
    int Input_Array[1024 * 768];
    int Idx = 0;

    for (int i = 0; i < imageHeight; i++)
    {
        for (int j = 0; j < imageWidth; j++)
        {
            int pixel = Input.at<int>(i, j);
            Input_Array[Idx] = pixel;
            Idx++;
        }
    }

    //Create output array;
    int Output_Array[1024 * 768];

    //45 degree motion blur
    int filter[49] =
    {   0,      0,      0,      0,      0,      0,      0,
        0,      0,      0,      0,      0,      0,      0,
        0,      0,     -1,      0,      1,      0,      0,
        0,      0,     -2,      0,      2,      0,      0,
        0,      0,     -1,      0,      1,      0,      0,
        0,      0,      0,      0,      0,      0,      0,
        0,      0,      0,      0,      0,      0,      0 };

    //Convolution filter is 7x7
    int filterWidth = 7;
    int filterSize = filterWidth * filterWidth;     //Assuming square filter

    //Setup OpenCL Structure
    //Platform;
    int err;
    std::vector<cl::Platform> platform;
    cl::Platform::get(&platform);

    //Device
    std::vector<cl::Device> device;
    platform[0].getDevices(CL_DEVICE_TYPE_GPU, &device);

    //Context
    cl::Context context(device, NULL, NULL, NULL, NULL);

    //Command Queue
    cl::CommandQueue queue(context, device[0], 0);

    //Instead of Working with OpenCL images we use OpenCL Buffers instead
    //Create space for the 7x7 filter on the device
    cl::Buffer d_Filter(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filterSize * sizeof(cl_int), NULL, &err);

    //Create space for the input and output images on the device
    cl::Buffer d_Input(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filterSize * sizeof(cl_int), NULL, &err);
    cl::Buffer d_Output(context, CL_MEM_WRITE_ONLY, filterSize * sizeof(cl_int), NULL, &err);

    //Copy the filter buffer to device
    queue.enqueueWriteBuffer(d_Filter, CL_FALSE, 0, filterSize * sizeof(cl_int), filter, NULL, NULL);

    //Copy image buffer to device
    queue.enqueueWriteBuffer(d_Input, CL_FALSE, 0, imageSize * sizeof(cl_int), Input_Array, NULL, NULL);

    //Program
    std::ifstream programFile(PROGRAM_FILE);
    std::string programString(std::istreambuf_iterator<char>(programFile), (std::istreambuf_iterator<char>()));
    cl::Program::Sources source(1, std::make_pair(programString.c_str(), programString.length() + 1));
    cl::Program program(context, source, &err);
    err = program.build(device);

    //Create Kernel
    cl::Kernel kernel(program, KERNEL_FUNC, &err);

    //Set Kernel Arguments
    kernel.setArg(0, &d_Input);
    kernel.setArg(1, &d_Output);
    kernel.setArg(2, &imageHeight);
    kernel.setArg(3, &imageWidth);
    kernel.setArg(4, &d_Filter);
    kernel.setArg(5, &filterWidth);

    //Execute Kernel
    cl::NDRange offset(0, 0);
    cl::NDRange global_size(imageHeight, imageWidth);       //1.height - 2.width
    cl::NDRange local_size(1, 1);
    queue.enqueueNDRangeKernel(kernel,
        offset,
        global_size,
        local_size);

    //Read back the Output
    queue.enqueueReadBuffer(d_Output, CL_TRUE, 0, imageSize * sizeof(cl_int), Output_Array, NULL, NULL);

    //Convert 1D_Output to 2D_Output
    int Output_2DArray[768][1024];
    Idx = 0;
    for (int i = 0; i < imageHeight; i++)
    {
        for (int j = 0; j < imageWidth; j++)
        {
            Output_2DArray[i][j] = Input_Array[Idx];
            Idx++;
        }
    }

    //Create mat from Output_2DArray
    cv::Mat Output(768, 1024, CV_8U, Output_2DArray);

    cv::namedWindow("Result", CV_WINDOW_AUTOSIZE);

    cv::imshow("Output", Output);

    cv::waitKey();

}


Kernel Code:
__kernel void convolution(
   __global int* InputImage,
   __global int* OutputImage, 
   int h,
   int w,
   __constant float* int, 
   int filterWidth) 
{
   // Store each work-item’s unique row and column
   int column = get_global_id(0);
   int row    = get_global_id(1);

   int halfWidth = filterWidth/2;

   // Ignore Border Pixels
   if (row - halfWidth < 0 || row + halfWidth > h || column - halfWidth < 0 || column + halfWidth > w) {
        OutputImage[row*w + column] = 0     /*InputImage[row*w + column]*/;
        return;
   }

    // Iterator for the filter
   int filterIdx = 0;
   summ = 0;

   //Iterate over rows
   for (int j = -halfWidth; j <= halfWidth; j++) 
   {
      //Iterate over columns
      for (int i = -halfWidth; i <= halfWidth; i++) 
      {
         summ += InputImage[(row + j)*w + column + i]*filter[filterIdx++];
      }
   }

    OutputImage[row*w + column] = summ;
}

OpenCL映像卷积不起作用（堆栈溢出）

0 个答案: