我正在尝试检测闪烁的像素。我首先用C ++编写了代码,但我意识到CPU不适合它。所以我找到了OpenCL库。我以前从未用过。 此外,我还没有找到有关OpenCL的好的文档。
OpenCLHelper.cpp
#include <CL/cl.hpp>
#include <fstream>
#include <iostream>
#include <stdlib.h>
cl::Program CreateProgram(const std::string& fileName) {
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
auto platform = platforms.front();
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
auto device = devices.front();
std::ifstream file(fileName);
std::string src(std::istreambuf_iterator<char>(file), (std::istreambuf_iterator<char>()));
cl::Program::Sources sources(1, std::make_pair(src.c_str(), src.length()+1));
cl::Context context(device);
cl::Program program(context, sources);
std::cout << program.build("-cl-std=CL1.2") << std::endl;
return program;
}
main.cpp(不是文件的整个代码)
cl::Context context = program.getInfo<CL_PROGRAM_CONTEXT>();
vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
cl::Device device = devices.front();
unsigned char* shootFrame(unsigned char *data) {
unsigned char* frequencyImage = new unsigned char[pixelsPerFrame];
strcopy(data, frequencyImage);
cl_int err = 0;
cl::Buffer inBuf(context, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR, sizeof(unsigned char) * pixelsPerFrame * equalxFramesAtTheSameTime, lastFrames, &err);
cout << err << endl;
cl::Buffer outBuf(context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, sizeof(unsigned char) * pixelsPerFrame, nullptr, &err);
cl::Buffer var1(context, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR, sizeof(int), &isLightOn, &err);cout << err << endl;
//I creates variables, because I can't use #define in there
int equalxFramesAtTheSameTime2 = equalxFramesAtTheSameTime;
cl::Buffer var2(context, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR, sizeof(int), &equalxFramesAtTheSameTime2, &err);cout << err << endl;
int thresholdPixel2 = thresholdPixel;
cl::Buffer var3(context, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR, sizeof(int), &thresholdPixel2, &err);cout << err << endl;
int ppf = pixelsPerFrame;
cl::Buffer var4(context, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR, sizeof(int), &ppf, &err);cout << err << endl;
cl::Buffer var5(context, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR, sizeof(int), ¤tFrameID, &err);cout << err << endl;
cl::Kernel kernel(program, "ProcessImage"); cout << err << endl;
err = kernel.setArg(0, var1); cout << err << endl;
err = kernel.setArg(1, var2); cout << err << endl;
err = kernel.setArg(2, var3); cout << err << endl;
err = kernel.setArg(3, var4); cout << err << endl;
err = kernel.setArg(4, var5); cout << err << endl;
err = kernel.setArg(5, inBuf); cout << err << endl;
err = kernel.setArg(6, outBuf); cout << err << endl;
cl::CommandQueue queue(context, device);
err = queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(pixelsPerFrame)); cout << err << endl;
err = queue.enqueueReadBuffer(outBuf, CL_FALSE, 0, sizeof(unsigned char) * pixelsPerFrame, done); cout << err << endl;
cout << "done: " << queue.finish() << endl;
return getXYfromRawImage(done, frequencyImage, updown, leftright);
}
ProcessImage.cl
__kernel void ProcessImage(const int isLightOn, const int frameSize, const int thresholdPixel, const int pixelsPerFrame, const int currentFrameID, __global unsigned char* lastFrames, __global unsigned char* outData) {
int isBegin = 1;
bool mustBrightNow = !isLightOn;
int lastPixel = 0;
int isWrongPixel = 0;
for (int i=currentFrameID; i<frameSize + currentFrameID; i++) {
int i2 = i;
if(i >= frameSize) {
i2 = i2 - frameSize;
}
int id = (i2 * pixelsPerFrame) + get_global_id(0);
if (isBegin == 1) {
lastPixel = (int) lastFrames[ id ];
isBegin = 0;
} else {
int currentPixel = (int) lastFrames[ id ];
if (mustBrightNow == false) {
if (currentPixel + thresholdPixel < lastPixel) {
mustBrightNow = true;
} else {
isWrongPixel = 0; //It freezes when I write '1'
instead of '0'
break;
}
} else {
if (currentPixel - thresholdPixel > lastPixel) {
mustBrightNow = false;
} else {
isWrongPixel = 0; //Also it freezes when I write '1'. And it doesn't matter if is an integer or a boolean.
break;
}
}
lastPixel = currentPixel;
}
}
if (isWrongPixel == 0) {
outData[get_global_id(0)] = (uchar) (0);
} else {
outData[get_global_id(0)] = (uchar) (1);
}
}
在所有cout
上我得到0
。因此,“没有”明显的错误。
我知道,代码的某些部分没有经过优化,但是应该可以使用。
如果您现在想使用,unsigned char* lastFrames
的构建方式类似于每个像素一个字符(单色)。因此,它的大小为2000万(宽x高x以前的帧)。因此它具有多个框架,我可以在OpenCL中比较不同的框架。
那会是什么?
isWrongPixel
1
或
true
。0
或false
时可以使用,但是我需要一个
布尔值。那我在做什么错了?
我知道我的语法并不完全正确。
预先感谢
答案 0 :(得分:0)
根据评论进行讨论。
以下是使用图片的示例:
#include <CL/cl.hpp>
#include <vector>
std::vector<cl::Platform> clPlatforms;
cl::Platform::get(&clPlatforms);
// TODO Set correctly
cl::Device chosenDevice;
bool first = true;
for (auto &&platform : clPlatforms) {
std::vector<cl::Device> clDevices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &clDevices);
if (first) { // REMOVE
chosenDevice = clDevices[0];
first = false;
}
std::cout << platform.getInfo<CL_PLATFORM_NAME>()<<'\n';
for (auto &&device : clDevices) {
std::cout << device.getInfo<CL_DEVICE_NAME>()<<'\n';
}
}
cl::Context context{chosenDevice};
// Possible values
// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/cl_image_format.html
cl::ImageFormat format{ CL_R, CL_UNSIGNED_INT8};
std::size_t imageWidth = 640;
std::size_t imageHeight = 480;
std::size_t numFrames = 128;
// Fill as sequences of rows for each 2D
std::uint8_t
*input = new std::uint8_t[imageWidth * imageHeight * numFrames];
std::size_t i = 0;
for (std::size_t frameI = 0; frameI < numFrames; ++frameI)
for (std::size_t y = 0; y < imageHeight; ++y)
for (std::size_t x = 0; x < imageWidth; ++x)
input[i++] = 0; // INIT
// Zeroes specify data format, see
// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clCreateImage3D.html
// Note that images cannot be both read and write
cl::Image3D
inImage{context,
CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR,
format,
imageWidth,
imageHeight,
numFrames,
0,
0,
input,
nullptr};
cl::Image2D outImage{context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY,
format, imageWidth, imageHeight};
std::string source = "PASTE SOURCE HERE";
cl::Program program(context, source);
program.build("-cl-std=CL1.2");
cl::Kernel kernel(program, "ProcessImage");
kernel.setArg(0, (int)0);
kernel.setArg(1, (int)numFrames);
int thresholdPixel = 10; // SET
kernel.setArg(2, (int)thresholdPixel);
kernel.setArg(3, (int)(imageWidth * imageHeight));
int currentFrameID = 12; // SET
kernel.setArg(4, (int)currentFrameID);
kernel.setArg(5, inImage);
kernel.setArg(6, outImage);
cl::CommandQueue queue(context, chosenDevice);
queue.enqueueNDRangeKernel(kernel, cl::NullRange,
cl::NDRange(imageWidth, imageHeight));
std::uint8_t *output = new std::uint8_t[imageWidth * imageHeight];
// See
// https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clEnqueueReadImage.html
cl::size_t<3> region;
region[0] = imageWidth;
region[1] = imageHeight;
region[2] = 1;
//Might as well block if the next call would be clFinish anyway.
queue.enqueueReadImage(outImage, true, cl::size_t<3>(), region, 0, 0,
output);
内核源代码:
__kernel void ProcessImage(const int isLightOn, const int frameSize, const int thresholdPixel, const int pixelsPerFrame, const int currentFrameID, read_only image3d_t lastFrames, write_only image2d_t outData) {
int isBegin = 1;
bool mustBrightNow = !isLightOn;
int lastPixel = 0;
int isWrongPixel = 0;
for (int i=currentFrameID; i<frameSize + currentFrameID; i++) {
int i2 = i;
if(i >= frameSize) {
i2 = i2 - frameSize;
}
int pixValue = (int)read_imageui(lastFrames,(int4)(get_global_id(0),get_global_id(1),i2,0)).x;
if (isBegin == 1) {
lastPixel = pixValue;
isBegin = 0;
} else {
int currentPixel = pixValue;
if (mustBrightNow == false) {
if (currentPixel + thresholdPixel < lastPixel) {
mustBrightNow = true;
} else {
isWrongPixel = 1;
break;
}
} else {
if (currentPixel - thresholdPixel > lastPixel) {
mustBrightNow = false;
} else {
isWrongPixel = 1;
break;
}
}
lastPixel = currentPixel;
}
}
write_imageui(outData,(int2)(get_global_id(0),get_global_id(1)),(uint4)(isWrongPixel,0,0,0));
}
我能够在我的1050TI,英特尔7700HQ和英特尔630HD上运行此代码而不会冻结,我希望您能够:)
我用图像替换了缓冲区,并将此任务设为“ 2D”,这反映在enqueueNDRangeKernel
中。内核执行完全相同的操作,但是使索引图像更加自然。我不确定您是否知道多个平台,这可能使您可以使用GPU。它只需要最新的驱动程序,就没有其他要显示在平台和设备中的驱动程序。无需为const
变量创建缓冲区,只需在kernel.setArg
模板函数中使用正确的类型即可。
所以尝试一下:)