OpenCL:执行UNREACHABLE

时间:2013-11-12 09:36:39

标签: opencl amd-processor

我有通用内核,用于计算临时缓冲区中数组元素的部分和。

#if FUNC_SUM
#define FUNC(a, b) b += a;
#elif FUNC_ABS_SUM
#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
#elif FUNC_SQR_SUM
#define FUNC(a, b) b += a * a;
#else
#error No sum function
#endif

__kernel void sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
                                __global srcT *src, __global dstT *dst)
{
    int lid = get_local_id(0);
    int gid = get_group_id(0);
    int id = get_global_id(0);
    int idx = offset + id + (id / cols) * invalid_cols;

    __local dstT localmem_sum[128];
    dstT sum = (dstT)(0), temp;

    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
    {
        idx = offset + id + (id / cols) * invalid_cols;
        temp = convertToDstT(src[idx]);
        FUNC(temp, sum);
    }

    if (lid > 127) 
        localmem_sum[lid - 128] = sum; // ?? 
    barrier(CLK_LOCAL_MEM_FENCE);

    if (lid < 128)
        localmem_sum[lid] = sum + localmem_sum[lid];
    barrier(CLK_LOCAL_MEM_FENCE);

    for (int lsize = 64; lsize > 0; lsize >>= 1)
    {
        if (lid < lsize)
        {
            int lid2 = lsize + lid;
            localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }

    if (lid == 0)
        dst[gid] = localmem_sum[0];
}

此代码失败并显示“UNREACHABLE execution!”消息在标记为//的行上? 这段代码有什么不对吗?是否存在一些解决方法以避免此错误?

目标平台:AMD GPU

0 个答案:

没有答案