Lion OpenCL上的Xcode 4.2不断与EXC_BAD_ACCESS暂停

时间:2011-11-01 14:53:33

标签: xcode xcode4 osx-lion opencl

所以我在Lion上的Xcode 4.1中运行了一些简单的Hello World OpenCL代码,它在clEnqueueTask处不断中断。当我从MacResearch.org OpenCL tutorials运行源代码时发生同样的事情,该代码在clEnqueueNDRangeKernel处中断。 lldb给出代码1,地址0x30。

以下是代码:

#include <stdio.h>
#include <stdlib.h>

#include <OpenCL/opencl.h>

#define MEM_SIZE (128)
#define MAX_SOURCE_SIZE (0x100000)

int main ()
{

    char *program_source = "\n"\
    "__kernel void hello(__global char* string) \n"\
    "{                                          \n"\
    "   string[0]  = 'H';                       \n"\
    "   string[1]  = 'e';                       \n"\
    "   string[2]  = 'l';                       \n"\
    "   string[3]  = 'l';                       \n"\
    "   string[4]  = 'o';                       \n"\
    "   string[5]  = ',';                       \n"\
    "   string[6]  = ' ';                       \n"\
    "   string[7]  = 'w';                       \n"\
    "   string[8]  = 'o';                       \n"\
    "   string[9]  = 'r';                       \n"\
    "   string[10] = 'l';                       \n"\
    "   string[11] = 'd';                       \n"\
    "   string[12] = '!';                       \n"\
    "    string[13] = '\0';                     \n"\
    "}                                          \n"\
    "\n";

    size_t source_size = sizeof(program_source);

    cl_device_id device_id         = NULL;
    cl_context context             = NULL;
    cl_command_queue command_queue = NULL;
    cl_mem memobj                  = NULL;
    cl_program program             = NULL;
    cl_kernel kernel               = NULL;
    cl_platform_id platform_id     = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret;

    char string[MEM_SIZE];


    // get platform and device information
    ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);

    cl_int err = 0;
    size_t returned_size = 0;
    size_t buffer_size;

    // Get some information about the returned device
    cl_char vendor_name[1024] = {0};
    cl_char device_name[1024] = {0};
    err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
    err |= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name),device_name, &returned_size);
//    assert(err == CL_SUCCESS);
    printf("Connecting to %s %s...\n", vendor_name, device_name);

    // create OpenCL context
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);

    // create command queue
    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    // create memory buffer
    memobj = clCreateBuffer(context,CL_MEM_READ_WRITE, MEM_SIZE*sizeof(char), NULL, &ret);

    // create kernel program from source code
    program = clCreateProgramWithSource(context, 1, (const char **)&program_source, (const size_t*)&source_size, &ret);

    // build kernel program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    // create OpenCL Kernel
    kernel = clCreateKernel(program, "hello", &ret);

    // set OpenCL kernel parameters
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);

    // Execute OpenCL kernel
    ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);

    // copy results from the memory buffer
    ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE*sizeof(char), string, 0, NULL, NULL);

    // display results
    puts(string);

    // finish up
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(memobj);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);

    return 0;
}

尝试使用Guard Malloc,得到了:

GuardMalloc[OCL_HW-1453]: recording malloc stacks to disk using standard recorder
GuardMalloc[OCL_HW-1453]: Allocations will be placed on 16 byte boundaries.
GuardMalloc[OCL_HW-1453]:  - Some buffer overruns may not be noticed.
GuardMalloc[OCL_HW-1453]:  - Applications using vector instructions (e.g., SSE) should work.
GuardMalloc[OCL_HW-1453]: version 24.1
OCL_HW(1453) malloc: process 1423 no longer exists, stack logs deleted from /tmp/stack-logs.1423.OCL_HW.yL5f5u.index
OCL_HW(1453) malloc: stack logs being written into /tmp/stack-logs.1453.OCL_HW.pCjTNR.index
Connecting to NVIDIA GeForce GT 330M...

我在Snow Leopard和Xcode 3下对这些代码没有任何问题。我确保不会通过从目标中删除它们来编译任何.cl文件,并且“OpenCl.framework”是链接的和所有内容。

我实际上甚至擦过我的电脑并清理已安装的狮子和xcode,但仍然是一个问题。我很确定在这一点上它是愚蠢的。

- 谢谢一堆

1 个答案:

答案 0 :(得分:3)

你是对的 - 这是愚蠢的事。您将错误的值传递给clCreateProgramWithSource的第四个参数。您应该传递源字符串的长度,但是您传递的是指针的大小。你可以像这样解决它:

size_t source_size = strlen(program_source);

请注意,我通过检查clBuildProgram的返回值来找到它。它是-11,CL_BUILD_PROGRAM_FAILURE,这意味着你的内核编译失败了。由于你的内核看起来很好,我在命令行上做了这个:

CL_LOG_ERRORS=stdout ./test

这导致Apple OpenCL实现将编译器构建日志转储到标准输出。我看到了这个:

[CL_BUILD_ERROR] : OpenCL Build Error : Compiler build log:
<program source>:2:1: error: unknown type name '__kerne'
__kerne

<program source>:2:8: error: expected identifier or '('
__kerne

这让我立刻想到了你的源代码长度参数。

另请注意,您需要在内核中更改此内容:

string[13] = '\0';

string[13] = 0;

进行这些更改后,我在Macbook Pro上看到了这一点:

Connecting to AMD ATI Radeon HD 6490M...
Hello, world!