我试图实现并行Gauss-Seidel算法。这是我的核心功能
#define dx (float2)(1,0)
#define dy (float2)(0,1)
__kernel void rbgs(read_only image2d_t fh, read_only image2d_t vh,
global float *vvh, __global float* hx, __global float* hy,__global int* red)
{
float2 id = (float2)(get_global_id(0) << 1,get_global_id(1));
const float2 sz = (float2)(1,get_global_size(0));
const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE;
id.x += ((int)(id.y + *red) & 1);
const float s = ((read_imagef(vh,sampler,id-dx).x + read_imagef(vh,sampler,id+dx).x)/(*hy* *hy) + (read_imagef(vh,sampler,id-dy).x +
read_imagef(vh,sampler,id+dy).x)/(*hx* *hx) - read_imagef(fh, sampler,id).x)/(2/(*hx**hx)+2/(*hy**hy));
vvh[(int)dot(id,sz)] = s;
}
它编译良好,但我将图像传递给内核时出现问题
这是我尝试的内容
var rows = u.GetLength(0);
var cols = u.GetLength(1);
var array1d = new float[rows * cols];
var current = 0;
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
array1d[current++] = u[i, j];
}
}
rows = fh.GetLength(0);
cols = fh.GetLength(1);
var array1df= new float[rows * cols];
current = 0;
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
array1df[current++] = fh[i, j];
}
}
CLCalc.Program.Image2D CLfh = new CLCalc.Program.Image2D(array1df, M, N);
CLCalc.Program.Image2D CLvh = new CLCalc.Program.Image2D(array1d, M, N);
CLfh.WriteToDevice(array1df);
CLvh.WriteToDevice(array1d);
float[] solution = new float[(N+1)*(M+1)];
CLCalc.Program.Variable Stepx = new CLCalc.Program.Variable(new float[] { hx });
CLCalc.Program.Variable Stepy = new CLCalc.Program.Variable(new float[] { hy });
CLCalc.Program.Variable Red = new CLCalc.Program.Variable(new float[] { 1.0f });
CLCalc.Program.Variable Result = new CLCalc.Program.Variable(solution);
CLCalc.Program.MemoryObject[] args = new CLCalc.Program.MemoryObject[] {CLfh, CLvh, Result, Stepx, Stepy, Red} ;
gs.Execute(args, new int[] { 2, 4 });
Result.ReadFromDeviceTo(solution);
CLvh.WriteToDevice(solution);
Red.WriteToDevice(new float[] { 0.0f });
gs.Execute(args, new int[] { 2, 4 });
Result.ReadFromDeviceTo(solution);
for (int m = 0; m < (M + 1) * (N + 1); m++)
u[m / (N+1), m % (N+1)] = solution[m];
return u;
程序因运行时异常而失败:矢量长度应为4 *宽*高。我知道应该抛出异常因为Image以RGBA格式存储数据但我真的不明白我应该如何克服这个问题。
任何帮助都将不胜感激。