我用3个输入写了一个函数(masking):
对于我的例子,我选择了method = 1,它代表CMP_GT,测试inputOCL> comparisonValue是否为元素。
该函数的目的是将inputOCL中所有不符合给定copma的元素清零。
这是功能屏蔽:
void masking(cv::ocl::oclMat inputOCL, double comparisonValue, int method){
// NOTE: method can be set to 1-->5 corresponding to (==, >, >=, <, <=, !=)
cv::ocl::oclMat valueOCL(inputOCL.size(), inputOCL.type());
valueOCL.setTo(cv::Scalar(comparisonValue));
cv::ocl::oclMat logicalOCL;
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL); }
在对函数进行计时时,我发现在运行函数或运行以下代码时直接运行计算时运行时间存在很大差异:
int main(int argc, char** argv){
double value1 = 1.23456789012345;
double value2 = 1.23456789012344;
// initialize matrix
cv::Mat I(5000, 5000, CV_64F, cv::Scalar(value1));
// copy input to GPU
cv::ocl::oclMat inputOCL(I);
int method = 1;
static double start_TIMER;
// computation done in function
start_TIMER = cv::getTickCount();
masking(inputOCL, value2, method);
std::cout << "\nFunction runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
// direct computation
start_TIMER = cv::getTickCount();
cv::ocl::oclMat valueOCL(inputOCL.size(), inputOCL.type());
valueOCL.setTo(cv::Scalar(value2));
cv::ocl::oclMat logicalOCL;
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL);
std::cout << "\nDirect runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
此截图中可以看到运行时:
为什么运行时存在这么大的差异?
答案 0 :(得分:0)
我要感谢asarsakov(引起我注意破坏oclMats的问题)和DarkZeros(注意我忘了破坏函数中的第二个临时oclMat)。
但是,这并不能反映整个解决方案。这似乎是我能够实现相同的直接&#39;和&#39;功能&#39;结果是使用cv :: ocl :: oclMat&amp;发送oclMats。而不是cv :: ocl :: oclMat。
请参阅以下代码(完整代码,函数和所有代码),以获得产生相同结果的最终解决方案。通过更改main开头的布尔变量,我们控制计算(直接或通过函数)以及控制计时器内oclMat的释放。
#include "opencv2/ocl/ocl.hpp"
#include <conio.h>
void masking(cv::ocl::oclMat &inputOCL, cv::ocl::oclMat &valueOCL, cv::ocl::oclMat &logicalOCL, double comparisonValue, int method){
// NOTE: the method input is 1-->5 corresponding to (==, >, >=, <, <=, !=)
valueOCL.setTo(cv::Scalar(comparisonValue));
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL);
}
int main(int argc, char** argv){
bool direct = 1; // 1 for direct, 0 for function
bool release = 1; // 1 with releasing temporary oclMat, 0 without releasing them
// initialize data
int method = 1;
static double start_TIMER;
double value1 = 1.23456789012345;
double value2 = 1.23456789012344;
cv::Mat I(5000, 5000, CV_64F, cv::Scalar(value1));
if (direct){
// direct computation
cv::ocl::oclMat inputOCL1(I);
cv::ocl::oclMat valueOCL1(inputOCL1.size(), inputOCL1.type());
cv::ocl::oclMat logicalOCL1;
start_TIMER = cv::getTickCount();
valueOCL1.setTo(cv::Scalar(value2));
cv::ocl::compare(inputOCL1, valueOCL1, logicalOCL1, method);
logicalOCL1.convertTo(logicalOCL1, inputOCL1.type());
cv::ocl::multiply(logicalOCL1, inputOCL1, inputOCL1);
cv::ocl::multiply(1 / 255.0, inputOCL1, inputOCL1);
if (release){ valueOCL1.release(); logicalOCL1.release(); }
std::cout << "\nDirect runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
if (!direct){
// computation done in function
cv::ocl::oclMat inputOCL2(I);
cv::ocl::oclMat valueOCL2(inputOCL2.size(), inputOCL2.type());
cv::ocl::oclMat logicalOCL2;
start_TIMER = cv::getTickCount();
masking(inputOCL2, valueOCL2, logicalOCL2, value2, method);
if (release){ valueOCL2.release(); logicalOCL2.release(); }
std::cout << "\nFunction runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
printf("\nPress any key to exit...");
_getch();
return 0;
}