我有一个OpenCL程序,我想做数据传输和内核执行时间的性能测量。
我尝试使用“启用分析的创建队列”来执行此操作,但是在行
之后 command_queue = clCreateCommandQueue(context, CL_DEVICE, CL_QUEUE_PROFILING_ENABLE, &err);
有一行
cl_device_id device_id = cluInitDevice(CL_DEVICE, &context, &command_queue);
由于上下文约束而产生一些问题。
我该如何处理这个约束?
感谢。
答案 0 :(得分:0)
你是来自Uni Ibk吗?来自cl_utils.h的函数,我们从教授那里得到的。但这是代码。
cl_device_id cluInitDevice(size_t num, cl_context *out_context, cl_command_queue *out_queue) {
// get platform ids
cl_uint ret_num_platforms;
CLU_ERRCHECK(clGetPlatformIDs(0, NULL, &ret_num_platforms), "Failed to query number of ocl platforms");
cl_platform_id *ret_platforms = (cl_platform_id*)alloca(sizeof(cl_platform_id)*ret_num_platforms);
CLU_ERRCHECK(clGetPlatformIDs(ret_num_platforms, ret_platforms, NULL), "Failed to retrieve ocl platforms");
// get device id of desired device
cl_device_id device_id = NULL;
for(cl_uint i=0; i<ret_num_platforms; ++i) {
cl_uint ret_num_devices;
CLU_ERRCHECK(clGetDeviceIDs(ret_platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &ret_num_devices), "Failed to query number of ocl devices");
if(num < ret_num_devices) {
// desired device is on this platform, select
cl_device_id *ret_devices = (cl_device_id*)alloca(sizeof(cl_device_id)*ret_num_devices);
CLU_ERRCHECK(clGetDeviceIDs(ret_platforms[i], CL_DEVICE_TYPE_ALL, ret_num_devices, ret_devices, NULL), "Failed to retrieve ocl devices");
device_id = ret_devices[num];
}
num -= ret_num_devices;
}
// create opencl context if requested
if(out_context != NULL) {
cl_int err;
*out_context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
CLU_ERRCHECK(err, "Failed to create ocl context");
// create command queue if requested
if(out_queue != NULL) {
*out_queue = clCreateCommandQueue(*out_context, device_id, 0, &err);
CLU_ERRCHECK(err, "Failed to create ocl command queue");
}
}
return device_id;
}