我的内核函数必须存储一个具有7500个值的中间私有数组。在运行代码时,我的屏幕会空白一秒钟,当它恢复时,结果不会显示。假设没有足够的私有内存,我稍微更改了代码。现在它将每个新数组值与目前计算的最大值进行比较。这样我就不需要创建7000个值的数组。相反,我只储存最大的价值。但我仍然遇到同样的问题。那么我的屏幕空白的原因是什么呢? 这是我的核心:
__kernel void sampleKernel(
const uint trgr,
const uint trgc,
__global const float *TRG,
__global const float *ENT,
__global float *RES1,
__global float *RES2)
{
int pred, tars, preds;
float big1, big2;
float g1 = 0, g2 = 0;
float tol = 0.5f, val = 0.0f;
int i = get_global_id(0);
for(pred = 0; pred<trgr; pred++)
{
val = 0.0f;
for(tars = 0; tars<trgc; tars++)
{
for(preds = 0; preds<trgc; preds++)
{
if(TRG[pred*trgc+preds] > (TRG[pred*trgc+tars]-tol) && TRG[pred*trgc+preds]>(TRG[pred*trgc+tars]+tol) )
val = val+1;
}
}
val = ENT[pred]*val;
if(pred == 0)
{
big1 = val;
g1 = pred;
}
else if(pred == 1)
{
if(val>big1)
{
big2 = big1;
g2 = g1;
big1 = val;
g1 = pred;
}
}
else
{
if(val>big1)
{
big2 = big1;
g2 = g1;
big1 = val;
g1 = pred;
}
else if(val>big2)
{
big2 = val;
g2 = pred;
}
}
}
RES1[i] = g1;
RES2[i] = g2;
}
存储在private static String programSource;
中。这是代码:
//writing to GPU
clSetKernelArg(kernel, 0, Sizeof.cl_uint, Pointer.to(new int[]{7000}));
clSetKernelArg(kernel, 1, Sizeof.cl_uint, Pointer.to(new int[]{36}));
clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[1]));
clSetKernelArg(kernel, 4, Sizeof.cl_mem, Pointer.to(memObjects[2]));
clSetKernelArg(kernel, 5, Sizeof.cl_mem, Pointer.to(memObjects[3]));
//reading from GPU
clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, m * n * Sizeof.cl_float, pres1, 0, null, null);
clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0, m * n * Sizeof.cl_float, pres2, 0, null, null);