Question

我最近开始使用openCL.Net并遇到了一些图像处理代码的问题。我要做的是针对图像中的特定颜色并将其更改为不同的RGB值，以减少图像中的颜色总数。此过程的工作流程如下：

识别图像中的所有颜色。
识别要删除的颜色。
将该颜色舍入到图像中下一个最接近的颜色。
查找图像中与该颜色匹配的所有像素，并将其更改为圆角颜色。
重复上述步骤，直到达到色数阈值。

我所有这些代码都在C＃中运行，但现在我想在gpu上完成像素更改，因为连续多次运行250000像素可能需要几分钟。

using OpenCL.Net;
private Context _context;
private Device _device;

   public void SetupOpenCL()
    {
        ErrorCode error;
        Platform[] platforms = Cl.GetPlatformIDs(out error);
        List<Device> devicesList = new List<Device>();

        CheckErr(error, "Cl.GetPlatformIDs");

        foreach (Platform platform in platforms)
        {
            string platformName = Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString();
            Console.WriteLine("Platform: " + platformName);
            CheckErr(error, "Cl.GetPlatformInfo");
            //We will be looking only for GPU devices
            foreach (Device device in Cl.GetDeviceIDs(platform, DeviceType.Gpu, out error))
            {
                CheckErr(error, "Cl.GetDeviceIDs");
                //Console.WriteLine("Device: " + device.ToString());
                devicesList.Add(device);
            }
        }

        if (devicesList.Count <= 0)
        {
            //Console.WriteLine("No devices found.");
            //Set bool used to signify that code processed on CPU only
            return;
        }

        _device = devicesList[0];

        if (Cl.GetDeviceInfo(_device, DeviceInfo.ImageSupport, out error).CastTo<Bool>() == Bool.False)
        {
            //Console.WriteLine("No image support.");
            //Set bool used to signify that code processed on CPU only
            return;
        }
        _context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out error);    //Second parameter is amount of devices
        CheckErr(error, "Cl.CreateContext");
    }

通过Visual Studio，我可以看到我的电脑配备了Intel（R）Core（TM）i7-4500U CPU @ 1.80GHz和Intel（R）HD Graphics 4400（GPU）。设置我的内核的代码如下：

public Bitmap ImagingTest2(Bitmap inputBM, int[] oldRGB, int[] newRGB)
    {
        ErrorCode error;
        //Load and compile kernel source code.
        string programPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\ImagingTest2.cl";
        //The path to the source file may vary

        if (!System.IO.File.Exists(programPath))
        {
            Console.WriteLine("Program doesn't exist at path " + programPath);
            return inputBM;
        }

        string programSource = System.IO.File.ReadAllText(programPath);

        using (OpenCL.Net.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error))
        {
            CheckErr(error, "Cl.CreateProgramWithSource");
            //Compile kernel source
            error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
            CheckErr(error, "Cl.BuildProgram");
            //Check for any compilation errors
            if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo<BuildStatus>()
                != BuildStatus.Success)
            {
                CheckErr(error, "Cl.GetProgramBuildInfo");
                Console.WriteLine("Cl.GetProgramBuildInfo != Success");
                Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error));
                return inputBM;
            }
            //Create the required kernel (entry function)
            Kernel kernel = Cl.CreateKernel(program, "imagingTest", out error);
            CheckErr(error, "Cl.CreateKernel");

            int intPtrSize = 0;
            intPtrSize = Marshal.SizeOf(typeof(IntPtr));
            //Image's RGBA data converted to an unmanaged[] array
            byte[] inputByteArray;
            //OpenCL memory buffer that will keep our image's byte[] data.
            Mem inputImage2DBuffer;
            OpenCL.Net.ImageFormat clImageFormat = new OpenCL.Net.ImageFormat(ChannelOrder.RGBA, ChannelType.Unsigned_Int8);
            int inputImgWidth, inputImgHeight;

            int inputImgBytesSize;
            int inputImgStride;
            inputImgWidth = inputBM.Width;
            inputImgHeight = inputBM.Height;
            //Get raw pixel data of the bitmap
            //The format should match the format of clImageFormat
            BitmapData bitmapData = inputBM.LockBits(new Rectangle(0, 0, inputBM.Width, inputBM.Height),
                          ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);//inputImage.PixelFormat);
            inputImgStride = bitmapData.Stride;
            inputImgBytesSize = bitmapData.Stride * bitmapData.Height;

            //Copy the raw bitmap data to an unmanaged byte[] array
            inputByteArray = new byte[inputImgBytesSize];
            Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImgBytesSize);
            //Allocate OpenCL image memory buffer
            inputImage2DBuffer = (Mem)Cl.CreateImage2D(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, clImageFormat,
                                                (IntPtr)bitmapData.Width, (IntPtr)bitmapData.Height,
                                                (IntPtr)0, inputByteArray, out error);
            CheckErr(error, "Cl.CreateImage2D input");

            //Unmanaged output image's raw RGBA byte[] array
            byte[] outputByteArray = new byte[inputImgBytesSize];
            //Allocate OpenCL image memory buffer
            Mem outputImage2DBuffer = (Mem)Cl.CreateImage2D(_context, MemFlags.CopyHostPtr |
                MemFlags.WriteOnly, clImageFormat, (IntPtr)inputImgWidth,
                (IntPtr)inputImgHeight, (IntPtr)0, outputByteArray, out error);
            CheckErr(error, "Cl.CreateImage2D output");
            //Pass the memory buffers to our kernel function
            error = Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, inputImage2DBuffer);
            error |= Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, outputImage2DBuffer);
            error = Cl.SetKernelArg(kernel, 2, (IntPtr)(sizeof(int) * 4), oldRGB);
            error = Cl.SetKernelArg(kernel, 3, (IntPtr)(sizeof(int) * 4), newRGB);
            CheckErr(error, "Cl.SetKernelArg");

            //Create a command queue, where all of the commands for execution will be added
            CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error);
            CheckErr(error, "Cl.CreateCommandQueue");
            Event clevent;
            //Copy input image from the host to the GPU.
            IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 };    //x, y, z
            IntPtr[] regionPtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 };    //x, y, z
            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 };
            error = Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, Bool.True,
               originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputByteArray, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueWriteImage");
            //Execute our kernel (OpenCL code)
            error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueNDRangeKernel");
            //Wait for completion of all calculations on the GPU.
            error = Cl.Finish(cmdQueue);
            CheckErr(error, "Cl.Finish");
            //Read the processed image from GPU to raw RGBA data byte[] array
            error = Cl.EnqueueReadImage(cmdQueue, outputImage2DBuffer, Bool.True, originPtr, regionPtr,
                                        (IntPtr)0, (IntPtr)0, outputByteArray, 0, null, out clevent);
            CheckErr(error, "Cl.clEnqueueReadImage");
            //Clean up memory
            Cl.ReleaseKernel(kernel);
            Cl.ReleaseCommandQueue(cmdQueue);

            Cl.ReleaseMemObject(inputImage2DBuffer);
            Cl.ReleaseMemObject(outputImage2DBuffer);
            //Get a pointer to our unmanaged output byte[] array
            GCHandle pinnedOutputArray = GCHandle.Alloc(outputByteArray, GCHandleType.Pinned);
            IntPtr outputBmpPointer = pinnedOutputArray.AddrOfPinnedObject();
            //Create a new bitmap with processed data and save it to a file.
            Bitmap outputBitmap = new Bitmap(inputImgWidth, inputImgHeight,
                  inputImgStride, PixelFormat.Format32bppArgb, outputBmpPointer);
            inputBM.UnlockBits(bitmapData);
            pinnedOutputArray.Free();
            return outputBitmap;
        }
    }

其中inputBM是一个已知的Bitmap，我有每个像素的RGB数据。 oldRGB是我想要更改的像素的RGB值。 newRGB是我想要将像素更改为的RGB值。

最后，这是内核本身的代码：

__kernel void imagingTest(__read_only  image2d_t srcImg,__write_only image2d_t dstImg, int4 oldRGB, int4 newRGB)
{
  const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates
  CLK_ADDRESS_CLAMP_TO_EDGE |
  CLK_FILTER_NEAREST;

  int2 coord = (int2)(get_global_id(0), get_global_id(1));
  int4 bgra = read_imagei(srcImg, smp, coord); //The byte order is BGRA
  //printf("%v4i\n", bgra); //For Debug only

  if(oldRGB.x == bgra.z && oldRGB.y == bgra.y && oldRGB.z == bgra.x)
  {
    bgra.x = (int) newRGB.z;
    bgra.y = (int) newRGB.y;
    bgra.z = (int) newRGB.x;
    bgra.w = 255;
  }
  write_imagei(dstImg, coord, bgra);
}

我可以成功编译并执行此代码。问题是从gpu返回的图像在颜色上是非常不同的。使用printf命令，我发现当我在内核中使用read_imagei时使用Bitmap.GetPixel时，我在输入图像中获得的RGB值在C＃中是不同的。他是我在一次测试中得到的。

Bitmap中包含6种颜色的RGB值，使用Bitmap.GetPixel：

255,52,12
226105123
255206206
246167180
250250250
255213213

传递给gpu的相同位图使用read_imagei返回以下RGB值：

255,0,0
255192192
255120120
255160160
255255255
255220220

我不确定这种差异是由图像格式化问题引起的，还是可能是由于在read_imagei命令期间gpu正在执行某种舍入。我是一名自学成才的程序员，我已经达到了谷歌搜索能力的极限。对此问题的任何解释或帮助将不胜感激。

谢谢，

******** 更新 ********

所以我用另一种颜色的图像进行了另一次测试。当我使用Bitmap.GetPixel读取C＃中的像素时，我发现了10种独特的颜色。当我使用read_imagei读取gpu上的图像时，它返回了17种独特的颜色。我不知道为什么颜色数量会有这种差异。

Bitmap中包含10种颜色的RGB值，使用Bitmap.GetPixel：

217,0,40
250,133,0
255,157,37
254,231,61
7,113,26
28,136,23
1110165
121,54,102
96,78,135
247,111,1

传递给gpu的相同位图使用read_imagei返回以下RGB值：

228,3,3
255,140,0
247,111,1
255,237,0
255,137,0
255,173,0
255,241,0
255,240,0
0,118,40
0,128,38
0,77,255
0,75,255
0,130,0
0,94,205
121,0,127
117,7,135
56,61,222

OpenCL read_imagei返回错误的值

0 个答案: