有人可以指导我如何在C#中执行矩阵乘法以通过opencl使用GPU。
我在这里看过opencl示例: https://www.codeproject.com/Articles/1116907/How-to-Use-Your-GPU-in-NET
但是我不确定如何进行矩阵乘法。
答案 0 :(得分:0)
是的,就像doqtor一样,您需要展平为1D。所以我有一个使用更多参数的例子:
class Program
{
static string CalculateKernel
{
get
{
return @"
kernel void Calc(global int* m1, global int* m2, int size)
{
for(int i = 0; i < size; i++)
{
printf("" %d / %d\n"",m1[i],m2[i] );
}
}";
}
}
static void Main(string[] args)
{
int[] r1 = new int[]
{1, 2, 3, 4};
int[] r2 = new int[]
{4, 3, 2, 1};
int rowSize = r1.Length;
// pick first platform
ComputePlatform platform = ComputePlatform.Platforms[0];
// create context with all gpu devices
ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
new ComputeContextPropertyList(platform), null, IntPtr.Zero);
// create a command queue with first gpu found
ComputeCommandQueue queue = new ComputeCommandQueue(context,
context.Devices[0], ComputeCommandQueueFlags.None);
// load opencl source and
// create program with opencl source
ComputeProgram program = new ComputeProgram(context, CalculateKernel);
// compile opencl source
program.Build(null, null, null, IntPtr.Zero);
// load chosen kernel from program
ComputeKernel kernel = program.CreateKernel("Calc");
// allocate a memory buffer with the message (the int array)
ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context,
ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);
// allocate a memory buffer with the message (the int array)
ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context,
ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);
kernel.SetMemoryArgument(0, row1Buffer); // set the integer array
kernel.SetMemoryArgument(1, row2Buffer); // set the integer array
kernel.SetValueArgument(2, rowSize); // set the array size
// execute kernel
queue.ExecuteTask(kernel, null);
// wait for completion
queue.Finish();
Console.WriteLine("Finished");
Console.ReadKey();
}
从gpubuffer读取结果的另一个样本:
class Program
{
static string CalculateKernel
{
get
{
// you could put your matrix algorithm here an take the result in array m3
return @"
kernel void Calc(global int* m1, global int* m2, int size, global int* m3)
{
for(int i = 0; i < size; i++)
{
int val = m2[i];
printf("" %d / %d\n"",m1[i],m2[i] );
m3[i] = val * 4;
}
}";
}
}
static void Main(string[] args)
{
int[] r1 = new int[]
{8, 2, 3, 4};
int[] r2 = new int[]
{4, 3, 2, 5};
int[] r3 = new int[4];
int rowSize = r1.Length;
// pick first platform
ComputePlatform platform = ComputePlatform.Platforms[0];
// create context with all gpu devices
ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu,
new ComputeContextPropertyList(platform), null, IntPtr.Zero);
// create a command queue with first gpu found
ComputeCommandQueue queue = new ComputeCommandQueue(context,
context.Devices[0], ComputeCommandQueueFlags.None);
// load opencl source and
// create program with opencl source
ComputeProgram program = new ComputeProgram(context, CalculateKernel);
// compile opencl source
program.Build(null, null, null, IntPtr.Zero);
// load chosen kernel from program
ComputeKernel kernel = program.CreateKernel("Calc");
// allocate a memory buffer with the message (the int array)
ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context,
ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1);
// allocate a memory buffer with the message (the int array)
ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context,
ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2);
// allocate a memory buffer with the message (the int array)
ComputeBuffer<int> resultBuffer = new ComputeBuffer<int>(context,
ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, new int[4]);
kernel.SetMemoryArgument(0, row1Buffer); // set the integer array
kernel.SetMemoryArgument(1, row2Buffer); // set the integer array
kernel.SetValueArgument(2, rowSize); // set the array size
kernel.SetMemoryArgument(3, resultBuffer); // set the integer array
// execute kernel
queue.ExecuteTask(kernel, null);
// wait for completion
queue.Finish();
GCHandle arrCHandle = GCHandle.Alloc(r3, GCHandleType.Pinned);
queue.Read<int>(resultBuffer, true, 0, r3.Length, arrCHandle.AddrOfPinnedObject(), null);
Console.WriteLine("display result from gpu buffer:");
for (int i = 0; i<r3.Length;i++)
Console.WriteLine(r3[i]);
arrCHandle.Free();
row1Buffer.Dispose();
row2Buffer.Dispose();
kernel.Dispose();
program.Dispose();
queue.Dispose();
context.Dispose();
Console.WriteLine("Finished");
Console.ReadKey();
}
}
您只需修改内核程序即可计算2个矩阵的乘法
上一个程序的结果:
8 / 4
2 / 3
3 / 2
4 / 5
display result from gpu buffer:
16
12
8
20
Finished
将2d展平为1d的方法非常简单:
int[,] twoD = { { 1, 2,3 }, { 3, 4,5 } };
int[] oneD = twoD.Cast<int>().ToArray();
然后看到this link做一维-> 2D
答案 1 :(得分:0)