我最近开始使用openCL.Net并遇到了一些图像处理代码的问题。我要做的是针对图像中的特定颜色并将其更改为不同的RGB值,以减少图像中的颜色总数。此过程的工作流程如下:
我所有这些代码都在C#中运行,但现在我想在gpu上完成像素更改,因为连续多次运行250000像素可能需要几分钟。
using OpenCL.Net;
private Context _context;
private Device _device;
public void SetupOpenCL()
{
ErrorCode error;
Platform[] platforms = Cl.GetPlatformIDs(out error);
List<Device> devicesList = new List<Device>();
CheckErr(error, "Cl.GetPlatformIDs");
foreach (Platform platform in platforms)
{
string platformName = Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString();
Console.WriteLine("Platform: " + platformName);
CheckErr(error, "Cl.GetPlatformInfo");
//We will be looking only for GPU devices
foreach (Device device in Cl.GetDeviceIDs(platform, DeviceType.Gpu, out error))
{
CheckErr(error, "Cl.GetDeviceIDs");
//Console.WriteLine("Device: " + device.ToString());
devicesList.Add(device);
}
}
if (devicesList.Count <= 0)
{
//Console.WriteLine("No devices found.");
//Set bool used to signify that code processed on CPU only
return;
}
_device = devicesList[0];
if (Cl.GetDeviceInfo(_device, DeviceInfo.ImageSupport, out error).CastTo<Bool>() == Bool.False)
{
//Console.WriteLine("No image support.");
//Set bool used to signify that code processed on CPU only
return;
}
_context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out error); //Second parameter is amount of devices
CheckErr(error, "Cl.CreateContext");
}
通过Visual Studio,我可以看到我的电脑配备了Intel(R)Core(TM)i7-4500U CPU @ 1.80GHz和Intel(R)HD Graphics 4400(GPU)。设置我的内核的代码如下:
public Bitmap ImagingTest2(Bitmap inputBM, int[] oldRGB, int[] newRGB)
{
ErrorCode error;
//Load and compile kernel source code.
string programPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\ImagingTest2.cl";
//The path to the source file may vary
if (!System.IO.File.Exists(programPath))
{
Console.WriteLine("Program doesn't exist at path " + programPath);
return inputBM;
}
string programSource = System.IO.File.ReadAllText(programPath);
using (OpenCL.Net.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error))
{
CheckErr(error, "Cl.CreateProgramWithSource");
//Compile kernel source
error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
CheckErr(error, "Cl.BuildProgram");
//Check for any compilation errors
if (Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo<BuildStatus>()
!= BuildStatus.Success)
{
CheckErr(error, "Cl.GetProgramBuildInfo");
Console.WriteLine("Cl.GetProgramBuildInfo != Success");
Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error));
return inputBM;
}
//Create the required kernel (entry function)
Kernel kernel = Cl.CreateKernel(program, "imagingTest", out error);
CheckErr(error, "Cl.CreateKernel");
int intPtrSize = 0;
intPtrSize = Marshal.SizeOf(typeof(IntPtr));
//Image's RGBA data converted to an unmanaged[] array
byte[] inputByteArray;
//OpenCL memory buffer that will keep our image's byte[] data.
Mem inputImage2DBuffer;
OpenCL.Net.ImageFormat clImageFormat = new OpenCL.Net.ImageFormat(ChannelOrder.RGBA, ChannelType.Unsigned_Int8);
int inputImgWidth, inputImgHeight;
int inputImgBytesSize;
int inputImgStride;
inputImgWidth = inputBM.Width;
inputImgHeight = inputBM.Height;
//Get raw pixel data of the bitmap
//The format should match the format of clImageFormat
BitmapData bitmapData = inputBM.LockBits(new Rectangle(0, 0, inputBM.Width, inputBM.Height),
ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);//inputImage.PixelFormat);
inputImgStride = bitmapData.Stride;
inputImgBytesSize = bitmapData.Stride * bitmapData.Height;
//Copy the raw bitmap data to an unmanaged byte[] array
inputByteArray = new byte[inputImgBytesSize];
Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImgBytesSize);
//Allocate OpenCL image memory buffer
inputImage2DBuffer = (Mem)Cl.CreateImage2D(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, clImageFormat,
(IntPtr)bitmapData.Width, (IntPtr)bitmapData.Height,
(IntPtr)0, inputByteArray, out error);
CheckErr(error, "Cl.CreateImage2D input");
//Unmanaged output image's raw RGBA byte[] array
byte[] outputByteArray = new byte[inputImgBytesSize];
//Allocate OpenCL image memory buffer
Mem outputImage2DBuffer = (Mem)Cl.CreateImage2D(_context, MemFlags.CopyHostPtr |
MemFlags.WriteOnly, clImageFormat, (IntPtr)inputImgWidth,
(IntPtr)inputImgHeight, (IntPtr)0, outputByteArray, out error);
CheckErr(error, "Cl.CreateImage2D output");
//Pass the memory buffers to our kernel function
error = Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, inputImage2DBuffer);
error |= Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, outputImage2DBuffer);
error = Cl.SetKernelArg(kernel, 2, (IntPtr)(sizeof(int) * 4), oldRGB);
error = Cl.SetKernelArg(kernel, 3, (IntPtr)(sizeof(int) * 4), newRGB);
CheckErr(error, "Cl.SetKernelArg");
//Create a command queue, where all of the commands for execution will be added
CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error);
CheckErr(error, "Cl.CreateCommandQueue");
Event clevent;
//Copy input image from the host to the GPU.
IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; //x, y, z
IntPtr[] regionPtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; //x, y, z
IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 };
error = Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, Bool.True,
originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputByteArray, 0, null, out clevent);
CheckErr(error, "Cl.EnqueueWriteImage");
//Execute our kernel (OpenCL code)
error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent);
CheckErr(error, "Cl.EnqueueNDRangeKernel");
//Wait for completion of all calculations on the GPU.
error = Cl.Finish(cmdQueue);
CheckErr(error, "Cl.Finish");
//Read the processed image from GPU to raw RGBA data byte[] array
error = Cl.EnqueueReadImage(cmdQueue, outputImage2DBuffer, Bool.True, originPtr, regionPtr,
(IntPtr)0, (IntPtr)0, outputByteArray, 0, null, out clevent);
CheckErr(error, "Cl.clEnqueueReadImage");
//Clean up memory
Cl.ReleaseKernel(kernel);
Cl.ReleaseCommandQueue(cmdQueue);
Cl.ReleaseMemObject(inputImage2DBuffer);
Cl.ReleaseMemObject(outputImage2DBuffer);
//Get a pointer to our unmanaged output byte[] array
GCHandle pinnedOutputArray = GCHandle.Alloc(outputByteArray, GCHandleType.Pinned);
IntPtr outputBmpPointer = pinnedOutputArray.AddrOfPinnedObject();
//Create a new bitmap with processed data and save it to a file.
Bitmap outputBitmap = new Bitmap(inputImgWidth, inputImgHeight,
inputImgStride, PixelFormat.Format32bppArgb, outputBmpPointer);
inputBM.UnlockBits(bitmapData);
pinnedOutputArray.Free();
return outputBitmap;
}
}
其中inputBM是一个已知的Bitmap,我有每个像素的RGB数据。 oldRGB是我想要更改的像素的RGB值。 newRGB是我想要将像素更改为的RGB值。
最后,这是内核本身的代码:
__kernel void imagingTest(__read_only image2d_t srcImg,__write_only image2d_t dstImg, int4 oldRGB, int4 newRGB)
{
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST;
int2 coord = (int2)(get_global_id(0), get_global_id(1));
int4 bgra = read_imagei(srcImg, smp, coord); //The byte order is BGRA
//printf("%v4i\n", bgra); //For Debug only
if(oldRGB.x == bgra.z && oldRGB.y == bgra.y && oldRGB.z == bgra.x)
{
bgra.x = (int) newRGB.z;
bgra.y = (int) newRGB.y;
bgra.z = (int) newRGB.x;
bgra.w = 255;
}
write_imagei(dstImg, coord, bgra);
}
我可以成功编译并执行此代码。问题是从gpu返回的图像在颜色上是非常不同的。使用printf命令,我发现当我在内核中使用read_imagei时使用Bitmap.GetPixel时,我在输入图像中获得的RGB值在C#中是不同的。他是我在一次测试中得到的。
Bitmap中包含6种颜色的RGB值,使用Bitmap.GetPixel:
传递给gpu的相同位图使用read_imagei返回以下RGB值:
我不确定这种差异是由图像格式化问题引起的,还是可能是由于在read_imagei命令期间gpu正在执行某种舍入。我是一名自学成才的程序员,我已经达到了谷歌搜索能力的极限。对此问题的任何解释或帮助将不胜感激。
谢谢,
******** 更新 ********
所以我用另一种颜色的图像进行了另一次测试。当我使用Bitmap.GetPixel读取C#中的像素时,我发现了10种独特的颜色。当我使用read_imagei读取gpu上的图像时,它返回了17种独特的颜色。我不知道为什么颜色数量会有这种差异。
Bitmap中包含10种颜色的RGB值,使用Bitmap.GetPixel:
传递给gpu的相同位图使用read_imagei返回以下RGB值: