我正在尝试使用openMP和openCV在c ++中实现图像形态学运算符。算法工作正常但是,当我得到VTune的分析结果时,我发现并行化方法比顺序方法花费的时间更多,并且它是由 .at() openCv函数引起的。为什么?我怎么能解决它?
这是我的代码:
Mat Morph_op_manager::compute_morph_base_op(Mat image, bool parallel, int type) {
//strel attribute
int strel_rows = 5;
int strel_cols = 5;
//strel center coordinate
int cr = 2;
int cc = 2;
//number of row and column after strel center
int nrac = strel_rows - cr ;
int ncac = strel_cols - cr ;
//strel init
Mat strel(strel_rows,strel_cols,CV_8UC1, Scalar(0));
Mat op_result = image.clone();
if (parallel == false)
omp_set_num_threads(1); // Use 1 threads for all consecutive parallel regions
//parallelized nested loop
#pragma omp parallel for collapse(4)
for (int i= cr ; i<image.rows-nrac; i++)
for (int j = cc; j < image.cols -ncac; j++) {
for (int m = 0; m < strel_rows; m++)
for (int n = 0; n < strel_cols; n++) {
// if type = 0 -> erode
if (type == 0){
if (image.at<uchar>(i-(strel_rows-m),j-(strel_cols-n)) != strel.at<uchar>(m,n)){
op_result.at<uchar>(i, j) = 255;
}
}
// if type == 0 -> dilate
if (type == 1){
if (image.at<uchar>(i-(strel_rows-m),j-(strel_cols-n)) == strel.at<uchar>(m,n)){
op_result.at<uchar>(i, j) = 0;
}
}
}
}
}
这是分析结果:
SPEED-UP:
而是使用**。at()**方法我使用指针访问像素矩阵,并按照下面的代码中的描述改变我的指令。
问题仍然存在:在我的分析日志 Mat ::。release()花费大量时间为什么?我怎么能解决它?
加速代码:
omp_set_num_threads(4);
double start_time = omp_get_wtime();
#pragma omp parallel for shared(strel,image,op_result,strel_el_count) private(i,j) schedule(dynamic) if(parallel == true)
for( i = cr; i < image.rows-nrac; i++)
{
op_result.addref();
uchar* opresult_ptr = op_result.ptr<uchar>(i);
for ( j = cc; j < image.cols-ncac; j++)
{
//type == 0 --> erode
if (type == 0 ){
if(is_fullfit(image,i,j,strel,strel_el_count,parallel)){
opresult_ptr[j] = 0;
}
else
opresult_ptr[j] = 255;
}
}
}
这是 fullfit函数
bool Morph_op_manager::is_fullfit(Mat image,int i,int j,Mat strel,int strel_counter,bool parallel){
int mask_counter = 0;
int ii=0;
int jj=0;
for ( ii = 0; ii <strel.rows ; ii++) {
uchar* strel_ptr = strel.ptr<uchar>(ii);
uchar* image_ptr = image.ptr<uchar>(i - (strel.rows - ii));
for ( jj = 0; jj <strel.cols ; ++jj) {
mask_counter += (int) image_ptr[j-(strel.cols-jj)];
}
}
return mask_counter == strel_counter;
}
cpu数据: 中央处理器 名称:第4代Intel(R)Core(TM)处理器系列 频率:2.4 GHz 逻辑CPU数量:4
这是我的日志: