我正在尝试测量循环执行内核的时间。问题是我得到的不仅是1的时间(而是出现了5个特定的时间值)。以下是代码,请让我知道我在做什么错。谢谢
int b = 100;
int a;
cl_event prof_event;
for (a = 0; a < b; a++)
{
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, globalws, NULL, 0, NULL, &prof_event);
if (ret != CL_SUCCESS) {
printf("Failed to enqueueNDRangeKernel.\n");
exit(1);
}
clEnqueueReadBuffer(command_queue, bufferC, CL_TRUE, 0, N * sizeof(float), (void *)C, 0, NULL, NULL);
clEnqueueWriteBuffer(command_queue, bufferY, CL_TRUE, 0, 1 * N * sizeof(float), (void *)C, 0, NULL, NULL);
clFinish(command_queue);
ret = clWaitForEvents(1, &prof_event);
ret |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ev_start_time, NULL);
ret |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);
run_time_gpu += (ev_end_time - ev_start_time);
for (int i = 0; i < N; i++) {
printf("%f, ", C[i]);
}
}
printf("\n");
float p = run_time_gpu / (float)b;
printf("\nAverage time in nanoseconds = %lu\n", p);
答案 0 :(得分:0)
您有错误的OpenCL示例。尝试编写没有循环的普通程序。 看看这个example
我以这种方式完成了持续时间的计算:
double getDuration(cl_event event)
{
cl_ulong start_time, end_time;
clGetEventProfilingInfo (event,CL_PROFILING_COMMAND_START,sizeof(cl_ulong),&start_time,NULL);
clGetEventProfilingInfo (event,CL_PROFILING_COMMAND_END,sizeof(cl_ulong),&end_time,NULL);
double total_time = (end_time - start_time) * 1e-6;
return total_time;
}
cl_event timer;
int ret = clEnqueueNDRangeKernel(cq, kernel, 1, p_global_work_offset, &global_work_size, &local_work_size, 0, NULL, &timer);
printf("T:%fms", getDuration(timer));