Question

问题解决了（如果你感兴趣;你可以看到第二段;在线下面）。现在我有了一个新问题;为什么#define BLOCK_DIM 16;导致下面的函数出错？只需使用16即可。

以下是错误

     expected a "]"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                           ^

     line 110: error:
              expected a ")"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                                        ^

     line 110: error: operand
              of "*" must be a pointer
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;

error:
          expected a ";"
          int Idout = get_local_id(0)*(BLOCK_DIM+1)+get_local_id(1);
                                                  ^

和功能

    __kernel void   transpose(
             __global float2* dataout, 
             __global float2* datain, 
             int width, int height)

// width = N (signal length) 
// height = batch_size (number of signals in a batch)

{
// read the matrix tile into shared memory

__local float2 block[32 * (32 + 1)] ;
   unsigned int xIndex = get_global_id(0);
   unsigned int yIndex = get_global_id(1);

    if((xIndex < width) && (yIndex < height))
    {
            unsigned int index_in = yIndex * width + xIndex;
                       int Idin = get_local_id(1)*(32+1)+get_local_id(0);
                       block[Idin]=  datain[index_in];
    }

barrier(CLK_LOCAL_MEM_FENCE);

// write the transposed matrix tile to global memory

             xIndex = get_group_id(1) * 32 + get_local_id(0);
             yIndex = get_group_id(0) * 32 + get_local_id(1);

    if((xIndex < height) && (yIndex < width))
    {
        unsigned int index_out = yIndex * height + xIndex;
        int Idout = get_local_id(0)*(32+1)+get_local_id(1);
                dataout[index_out] = block[Idout];
    }

}

===============================

我正致力于提高图像上2D FFT的性能。经过基准测试;我调整转置函数是使程序变慢的原因，所以我用更优化的替换它。

但在那之后;我收到了CL_INVALID_KERNEL_NAME之前工作正常的所有函数的返回码。除了转置函数和主机代码中的clSetKernelArg之外;我不改变任何其他东西。所以我不在乎。希望你们帮助我:)）

更新：这是错误。不介意行号:)这些行对我来说似乎很正常。有什么不对吗？

错误：

     expected a "]"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                           ^

     line 110: error:
              expected a ")"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                                        ^

     line 110: error: operand
              of "*" must be a pointer
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;

error:
          expected a ";"
          int Idout = get_local_id(0)*(BLOCK_DIM+1)+get_local_id(1);
                                                  ^

以下是 kernel file

新的：

#define BLOCK_DIM 16

__kernel void   transpose(
             __global float2* dataout, 
             __global float2* datain, 
             int width, int height)

// width = N (signal length) 
// height = batch_size (number of signals in a batch)

{
// read the matrix tile into shared memory

__local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
   unsigned int xIndex = get_global_id(0);
   unsigned int yIndex = get_global_id(1);

    if((xIndex < width) && (yIndex < height))
    {
            unsigned int index_in = yIndex * width + xIndex;
                       int Idin = get_local_id(1)*(BLOCK_DIM+1)+get_local_id(0);
                       block[Idin]=  datain[index_in];
    }

barrier(CLK_LOCAL_MEM_FENCE);

// write the transposed matrix tile to global memory

             xIndex = get_group_id(1) * BLOCK_DIM + get_local_id(0);
             yIndex = get_group_id(0) * BLOCK_DIM + get_local_id(1);

    if((xIndex < height) && (yIndex < width))
    {
        unsigned int index_out = yIndex * height + xIndex;
        int Idout = get_local_id(0)*(BLOCK_DIM+1)+get_local_id(1);
                dataout[index_out] = block[Idout];
    }

}

Answer 1

你的#define问题..他们不需要分号。基本上，＃define X Y将在编译之前在代码中用“Y”替换所有出现的“X”，如果你最后添加一个分号，它将成为“Y”的一部分，并产生大量的语法错误。 #define不是声明。

实际上，这是一个简单的解释，但它足以满足这个问题的范围（如果你想了解更多，我建议你看一下预处理器教程和文档）。

编辑函数后出现CL_INVALID_KERNEL_NAME错误（更新日志文件）

1 个答案: