OpenCL内核时序

时间:2016-10-18 09:55:54

标签: performance opencl

我有一个OpenCL程序,我想做数据传输和内核执行时间的性能测量。

我尝试使用“启用分析的创建队列”来执行此操作,但是在行

之后

command_queue = clCreateCommandQueue(context, CL_DEVICE, CL_QUEUE_PROFILING_ENABLE, &err);

有一行

cl_device_id device_id = cluInitDevice(CL_DEVICE, &context, &command_queue);

由于上下文约束而产生一些问题。

我该如何处理这个约束?

感谢。

1 个答案:

答案 0 :(得分:0)

你是来自Uni Ibk吗?来自cl_utils.h的函数,我们从教授那里得到的。但这是代码。

     cl_device_id cluInitDevice(size_t num, cl_context *out_context, cl_command_queue *out_queue) { 
// get platform ids
cl_uint ret_num_platforms;
CLU_ERRCHECK(clGetPlatformIDs(0, NULL, &ret_num_platforms), "Failed to query number of ocl platforms");
cl_platform_id *ret_platforms = (cl_platform_id*)alloca(sizeof(cl_platform_id)*ret_num_platforms);
CLU_ERRCHECK(clGetPlatformIDs(ret_num_platforms, ret_platforms, NULL), "Failed to retrieve ocl platforms");

// get device id of desired device
cl_device_id device_id = NULL;
for(cl_uint i=0; i<ret_num_platforms; ++i) {
    cl_uint ret_num_devices;
    CLU_ERRCHECK(clGetDeviceIDs(ret_platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &ret_num_devices), "Failed to query number of ocl devices");
    if(num < ret_num_devices) {
        // desired device is on this platform, select
        cl_device_id *ret_devices = (cl_device_id*)alloca(sizeof(cl_device_id)*ret_num_devices);
        CLU_ERRCHECK(clGetDeviceIDs(ret_platforms[i], CL_DEVICE_TYPE_ALL, ret_num_devices, ret_devices, NULL), "Failed to retrieve ocl devices");
        device_id = ret_devices[num];
    }
    num -= ret_num_devices;
}

// create opencl context if requested
if(out_context != NULL) {
    cl_int err;
    *out_context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
    CLU_ERRCHECK(err, "Failed to create ocl context");

    // create command queue if requested
    if(out_queue != NULL) {
        *out_queue = clCreateCommandQueue(*out_context, device_id, 0, &err);
        CLU_ERRCHECK(err, "Failed to create ocl command queue");
    }
}
return device_id;

}