我正在尝试测量将部分数据(其中一个缓冲区的内容)传输到GPU所需的时间。
我试着用这个:
cl_command_queue queueGPU = clCreateCommandQueue(GPUcontext, GPUdeviceIds[0], CL_QUEUE_PROFILING_ENABLE, &error);
cl_event transfer1;
clEnqueueWriteBuffer(queueGPU, data, CL_TRUE, 0, dataSize, loadedData, 0, nullptr, &transfer1);
clWaitForEvents(1, &transfer1);
cl_event transfer2;
clEnqueueWriteBuffer (queueGPU, indices, CL_TRUE, 0, sizeof(int) * queryCount, inputData, 0, nullptr, &transfer2);
clWaitForEvents(1, &transfer2);
unsigned long start = 0;
unsigned long end = 0;
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &start, NULL);
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &end, NULL);
unsigned long transferTime = end - start;
但它返回的时间与传输所有数据的时间完全相同。我做错了什么?
答案 0 :(得分:1)
似乎时间是相同的,因为每个clEnqueueWriteBuffer都需要自己的事件,并且我使用相同的事件测量了所有数据的时间。这让我只有一个缓冲区的时间。
这应该可以正常工作:
// Transfer time for both buffers
cl_event transfer1;
clEnqueueWriteBuffer(queueGPU, data, CL_TRUE, 0, dataSize, loadedData, 0, nullptr, &transfer1);
clWaitForEvents(1, &transfer1);
unsigned long start = 0;
unsigned long end = 0;
clGetEventProfilingInfo(transfer1, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &start, NULL);
clGetEventProfilingInfo(transfer1, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &end, NULL);
unsigned long transferTime1 = end - start;
cl_event transfer2;
clEnqueueWriteBuffer(queueGPU, indices, CL_TRUE, 0, sizeof(int) * queryCount, inputData, 0, nullptr, &transfer2);
clWaitForEvents(1, &transfer2);
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &start, NULL);
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &end, NULL);
unsigned long transferTime = end - start + transferTime1;