OpenCL中循环的时间测量

时间:2018-08-14 09:51:20

标签: c kernel opencl

我正在尝试测量循环执行内核的时间。问题是我得到的不仅是1的时间(而是出现了5个特定的时间值)。以下是代码,请让我知道我在做什么错。谢谢

int b = 100;
int a;
cl_event prof_event;
for (a = 0; a < b; a++)
{
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, globalws, NULL, 0, NULL, &prof_event);

    if (ret != CL_SUCCESS) {
        printf("Failed to enqueueNDRangeKernel.\n");
        exit(1);
    }

    clEnqueueReadBuffer(command_queue, bufferC, CL_TRUE, 0, N * sizeof(float), (void *)C, 0, NULL, NULL);
    clEnqueueWriteBuffer(command_queue, bufferY, CL_TRUE, 0, 1 * N * sizeof(float), (void *)C, 0, NULL, NULL);

    clFinish(command_queue);
    ret = clWaitForEvents(1, &prof_event);
    ret |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ev_start_time, NULL);
    ret |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);
    run_time_gpu += (ev_end_time - ev_start_time);

    for (int i = 0; i < N; i++) {
        printf("%f, ", C[i]);
    }
}
printf("\n");

float p = run_time_gpu / (float)b;
printf("\nAverage time in nanoseconds = %lu\n", p);

1 个答案:

答案 0 :(得分:0)

您有错误的OpenCL示例。尝试编写没有循环的普通程序。 看看这个example

我以这种方式完成了持续时间的计算:

double getDuration(cl_event event)                                                                                                                                                                                                                                                        
{                                                                                                                                                                                                                                                                                         
  cl_ulong start_time, end_time;                                                                                                                                                                                                                                                          
  clGetEventProfilingInfo (event,CL_PROFILING_COMMAND_START,sizeof(cl_ulong),&start_time,NULL);                                                                                                                                                                                           
  clGetEventProfilingInfo (event,CL_PROFILING_COMMAND_END,sizeof(cl_ulong),&end_time,NULL);                                                                                                                                                                                               
  double total_time = (end_time - start_time) * 1e-6;                                                                                                                                                                                                                                     
  return total_time;                                                                                                                                                                                                                                                                      
}              


cl_event timer;

int ret = clEnqueueNDRangeKernel(cq, kernel, 1, p_global_work_offset, &global_work_size, &local_work_size, 0, NULL, &timer);

printf("T:%fms", getDuration(timer));