我对OpenCL编程完全陌生,但多年来一直在(嵌入式)C编程。我遇到了clFinish返回-36的问题,其中afaik表示内核无法正常运行。但是,我不能为我的生活找出原因。测试内核在另一个版本中正常运行,两个版本之间的差异似乎可以忽略不计。下面是没有错误检查的代码的简化版本。 (否则检查所有返回值并返回CL_SUCCESS)
#define INPUT_ARR_SIZE 128
int inArr[INPUT_ARR_SIZE];
int main()
{
cl_uint num = 1;
cl_int ret;
cl_platform_id platforms[1];
cl_device_id devices[1];
cl_context_properties ccp[3];
cl_context ctx;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_mem mem1, mem2;
cl_int result;
clGetPlatformIDs(num, platforms, &num);
printf("%i Platforms\n", num);
num = 1;
clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, num, devices, &num);
printf("Devices: %d\n", num);
ccp[0] = CL_CONTEXT_PLATFORM;
ccp[1] = (intptr_t)platforms[0];
ccp[2] = 0;
ctx = clCreateContext(ccp, 1, devices, NULL, NULL, &ret);
queue = clCreateCommandQueue(ctx, devices[0], (cl_ulong)0, &ret);
program = clCreateProgramWithSource(ctx, 1, &KernelSrcPtr, NULL, &ret);
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
printf("Build success\n");
mem1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(inArr), NULL, &ret);
mem2 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(inArr), NULL, &ret);
kernel = clCreateKernel(program, "hello", &ret);
ret = clEnqueueWriteBuffer(queue, mem1, CL_TRUE, 0, sizeof(inArr), inArr, 0, NULL, NULL);
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem1);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem2);
cl_uint num2 = INPUT_ARR_SIZE;
ret = clEnqueueNDRangeKernel(queue, kernel, 1, 0, (size_t*)&num2, NULL, 0, NULL, NULL);
ret |= clFinish(queue);
}
这是内核:
const char KernelSrc[] =
"__kernel void hello(__global const int *mem1, __global int *mem2)\n"\
"{\n"\
" size_t id = get_global_id(0);\n"\
" int intm = mem1[id] * 2;\n"\
" intm = intm + 5;\n"\
" intm = intm * 6;\n"\
" mem2[id] = intm / 3;\n"\
"}\n\n";
另外,有趣的事情发生了,如果我在内核中注释掉mem2[id] = intm / 3;
行,程序就永远不会完成它的执行。
任何人都可以帮助我解决这些问题吗?