Question

有人可以指导我如何在C＃中执行矩阵乘法以通过opencl使用GPU。

我在这里看过opencl示例： https://www.codeproject.com/Articles/1116907/How-to-Use-Your-GPU-in-NET

但是我不确定如何进行矩阵乘法。

Answer 1

是的，就像doqtor一样，您需要展平为1D。所以我有一个使用更多参数的例子：

class Program
{
    static string CalculateKernel
    {
        get
        {
            return @"
            kernel void Calc(global int* m1, global int* m2, int size) 
            {
                for(int i = 0; i < size; i++)
                {
                    printf("" %d / %d\n"",m1[i],m2[i] );
                }
            }";
        }
    }

static void Main(string[] args)
    {

        int[] r1 = new int[]
            {1, 2, 3, 4};

        int[] r2 = new int[]
            {4, 3, 2, 1};

        int rowSize = r1.Length;

        // pick first platform
        ComputePlatform platform = ComputePlatform.Platforms[0];
        // create context with all gpu devices
        ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
            new ComputeContextPropertyList(platform), null, IntPtr.Zero);

        // create a command queue with first gpu found
        ComputeCommandQueue queue = new ComputeCommandQueue(context,
            context.Devices[0], ComputeCommandQueueFlags.None);

        // load opencl source and
        // create program with opencl source
        ComputeProgram program = new ComputeProgram(context, CalculateKernel);

        // compile opencl source
        program.Build(null, null, null, IntPtr.Zero);

        // load chosen kernel from program
        ComputeKernel kernel = program.CreateKernel("Calc");

        // allocate a memory buffer with the message (the int array)
        ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context,
            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);

        // allocate a memory buffer with the message (the int array)
        ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context,
            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);


        kernel.SetMemoryArgument(0, row1Buffer); // set the integer array
        kernel.SetMemoryArgument(1, row2Buffer); // set the integer array
        kernel.SetValueArgument(2, rowSize); // set the array size

            // execute kernel
        queue.ExecuteTask(kernel, null);

        // wait for completion
        queue.Finish();

        Console.WriteLine("Finished");
        Console.ReadKey();
    }

从gpubuffer读取结果的另一个样本：

class Program
{
    static string CalculateKernel
    {
        get
        {
            // you could put your matrix algorithm here an take the result in array m3
            return @"
            kernel void Calc(global int* m1, global int* m2, int size, global int* m3) 
            {
                for(int i = 0; i < size; i++)
                {
                    int val = m2[i];
                    printf("" %d / %d\n"",m1[i],m2[i] );
                    m3[i] = val * 4;
                }
            }";
        }
    }

static void Main(string[] args)
    {

        int[] r1 = new int[]
            {8, 2, 3, 4};

        int[] r2 = new int[]
            {4, 3, 2, 5};

        int[] r3 = new int[4];
        int rowSize = r1.Length;

        // pick first platform
        ComputePlatform platform = ComputePlatform.Platforms[0];
        // create context with all gpu devices
        ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
            new ComputeContextPropertyList(platform), null, IntPtr.Zero);

        // create a command queue with first gpu found
        ComputeCommandQueue queue = new ComputeCommandQueue(context,
            context.Devices[0], ComputeCommandQueueFlags.None);

        // load opencl source and
        // create program with opencl source
        ComputeProgram program = new ComputeProgram(context, CalculateKernel);

        // compile opencl source
        program.Build(null, null, null, IntPtr.Zero);

        // load chosen kernel from program
        ComputeKernel kernel = program.CreateKernel("Calc");

        // allocate a memory buffer with the message (the int array)
        ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context,
            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);

        // allocate a memory buffer with the message (the int array)
        ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context,
            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);

        // allocate a memory buffer with the message (the int array)
        ComputeBuffer<int> resultBuffer = new ComputeBuffer<int>(context,
            ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, new int[4]);


        kernel.SetMemoryArgument(0, row1Buffer); // set the integer array
        kernel.SetMemoryArgument(1, row2Buffer); // set the integer array
        kernel.SetValueArgument(2, rowSize); // set the array size
        kernel.SetMemoryArgument(3, resultBuffer); // set the integer array
        // execute kernel
        queue.ExecuteTask(kernel, null);

        // wait for completion
        queue.Finish();

        GCHandle arrCHandle = GCHandle.Alloc(r3, GCHandleType.Pinned);
        queue.Read<int>(resultBuffer, true, 0, r3.Length, arrCHandle.AddrOfPinnedObject(), null);

        Console.WriteLine("display result from gpu buffer:");
        for (int i = 0; i<r3.Length;i++)
            Console.WriteLine(r3[i]);

        arrCHandle.Free();
        row1Buffer.Dispose();
        row2Buffer.Dispose();
        kernel.Dispose();
        program.Dispose();
        queue.Dispose();
        context.Dispose();

        Console.WriteLine("Finished");
        Console.ReadKey();
    }
}

您只需修改内核程序即可计算2个矩阵的乘法

上一个程序的结果：

 8 / 4
 2 / 3
 3 / 2
 4 / 5
display result from gpu buffer:
16
12
8
20
Finished

将2d展平为1d的方法非常简单：

        int[,] twoD = { { 1, 2,3 }, { 3, 4,5 } };
        int[] oneD = twoD.Cast<int>().ToArray();

然后看到this link做一维-> 2D

Answer 2

我发现将OpenCL与点网一起使用的很好的参考资料。

该网站结构合理，非常有用。它还具有矩阵乘法的案例研究示例。

OpenCL Tutorial

如何在C＃

2 个答案: