我正在尝试使用clock_gettime函数获取以下代码的运行时。但是当我运行代码时,每次运行时我都会收到0.0000的时间。我也单独输出了开始和停止时间,我收到完全相同的答案。
struct timespec start, stop;
double accum;
if( clock_gettime( CLOCK_REALTIME, &start) == -1 ) {
perror( "clock gettime" );
exit( EXIT_FAILURE );
}
int src = 1, final_ret = 0;
for (int t = 0; t < rows - 1; t += pyramid_height)
{
int temp = src;
src = final_ret;
final_ret = temp;
// Calculate this for the kernel argument...
int arg0 = MIN(pyramid_height, rows-t-1);
int theHalo = HALO;
// Set the kernel arguments.
clSetKernelArg(cl.kernel(kn), 0, sizeof(cl_int), (void*) &arg0);
clSetKernelArg(cl.kernel(kn), 1, sizeof(cl_mem), (void*) &d_gpuWall);
clSetKernelArg(cl.kernel(kn), 2, sizeof(cl_mem), (void*) &d_gpuResult[src]);
clSetKernelArg(cl.kernel(kn), 3, sizeof(cl_mem), (void*) &d_gpuResult[final_ret]);
clSetKernelArg(cl.kernel(kn), 4, sizeof(cl_int), (void*) &cols);
clSetKernelArg(cl.kernel(kn), 5, sizeof(cl_int), (void*) &rows);
clSetKernelArg(cl.kernel(kn), 6, sizeof(cl_int), (void*) &t);
clSetKernelArg(cl.kernel(kn), 7, sizeof(cl_int), (void*) &borderCols);
clSetKernelArg(cl.kernel(kn), 8, sizeof(cl_int), (void*) &theHalo);
clSetKernelArg(cl.kernel(kn), 9, sizeof(cl_int) * (cl.localSize()), 0);
clSetKernelArg(cl.kernel(kn), 10, sizeof(cl_int) * (cl.localSize()), 0);
clSetKernelArg(cl.kernel(kn), 11, sizeof(cl_mem), (void*) &d_outputBuffer);
cl.launch(kn);
}
if( clock_gettime( CLOCK_REALTIME, &stop) == -1 ) {
perror( "clock gettime" );
exit( EXIT_FAILURE );
}
printf( "%lf\n", stop.tv_sec );
printf( "%lf\n", start.tv_sec );
accum = ( stop.tv_sec - start.tv_sec )
+ ( stop.tv_nsec - start.tv_nsec )
/ BILLION;
printf( "%lf\n", accum );
非常感谢任何关于我做错事的建议
答案 0 :(得分:3)
timespec::tv_nsec
是整数类型,因此如果BILLION
也是整数类型,那么:
( stop.tv_nsec - start.tv_nsec )
/ BILLION;
将截断为零。如果tv_sec
值相同,则会得到零差异。
尝试:
double( stop.tv_nsec - start.tv_nsec )
/ BILLION;
这将使用double
类型执行除法。