我有一个通过以下方式创建的OpenCL缓冲区:
return cl::Buffer(_context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size);
我将数据写入该缓冲区,并希望稍后在内核中使用它。 我得到一个奇怪的行为想法,因为我的内核无法使用该缓冲区。仅当我随机拨打
BufferContainer blah(oclEnvironment, cv::Size(width, height), 3);
调用上述函数再次创建相同大小的缓冲区,内核开始工作。我根本不叫blah.Write(...)。它似乎可以处理我写入第一个缓冲区的数据。但是,如果我用“ blah”缓冲区注释掉那一行,它将不再起作用。
e:两个缓冲区的尺寸都完全相同。
e2:它与命令队列和那里的对象顺序有关吗?
基本上,我尝试运行内核以缩小图像并找到最大hsv v值。然后,在该内核完成并给我最大值后,我运行一个参数设置为找到的最大值的下一个内核。因此呼叫链就像:
float maxV = _maxValueReduce->GetValueMaximum(oclEnvironment, fiBuffer, width, height, true);
//starting to paramter the next kernel
...
_kernel.setArg(8, maxV);
oclEnvironment._commandQueue.enqueueNDRangeKernel(_kernel, cl::NullRange, global, local);
然后,GetValueMaximum(...)自行启动一个还原内核以找到该最大值。
e3:
float OclMaxValueReduce::GetValueMaximum(OclEnvironment& oclEnvironment,
BufferContainer& source, int width, int height, const bool sync)
{
//Create the result buffer
//Intel HD 530 can have a max. workgroup size of 256.
int dim1 = 16;
int dim2 = 16;
cl::NDRange local(dim1, dim2,1);
cl::NDRange global(source._size.width, source._size.height, 1);
//Calculate the number of workgroups
int numberOfWorkgroups = ceil((width * height) / (float)(dim1 * dim2));
//each workgroup reduces the data to a single element. This elements are then reduced on host in the final reduction step.
//First create the buffer for the workgroups result
BufferContainer result(oclEnvironment, cv::Size(numberOfWorkgroups, 1), sizeof(float));
//set the kernel arguments
_kernel.setArg(0, source.GetOclBuffer());
_kernel.setArg(1, result.GetOclBuffer());
_kernel.setArg(2, width);
_kernel.setArg(3, height);
oclEnvironment._commandQueue.enqueueNDRangeKernel(_kernel, cl::NullRange, global, local);
if (sync)
oclEnvironment._commandQueue.finish();
//retrieve the reduced result array. The final reduce step is done here on host.
float* dest = new float[numberOfWorkgroups];
ReadBuffer(oclEnvironment, result.GetOclBuffer(), dest, numberOfWorkgroups);
std::vector<float> resultArray(dest, dest + numberOfWorkgroups);
delete[] dest;
//find and return the max in array.
std::vector<float>::iterator it;
it = std::max_element(resultArray.begin(), resultArray.end());
return resultArray[std::distance(resultArray.begin(), it)];
}
这将调用读取缓冲区:
/* Read a float array from ocl buffer*/
void OclMaxValueReduce::ReadBuffer(OclEnvironment oclEnvironment, cl::Buffer
&resultBuffer, float* dest, const size_t size) {
int errcode;
float* resultData = (float*)oclEnvironment._commandQueue.enqueueMapBuffer(resultBuffer, true, CL_MAP_READ, 0, size * sizeof(float), 0, 0, &errcode);
if (errcode)
throw std::exception(std::string("OclEnvironment::ReadBuffer: OCL could not map Buffer!").data(), errcode);
//std::copy(resultData, (resultData + size), dest);
memcpy(dest, resultData, size * sizeof(float));
cl::Event testEvent;
oclEnvironment._commandQueue.enqueueUnmapMemObject(resultBuffer, resultData, NULL, &testEvent); // Unmap Buffer
testEvent.wait();
}