Question

我希望并行化以下代码，但我不熟悉openmp并创建并行代码。

std::vector<DMatch> good_matches;
for (int i = 0; i < descriptors_A.rows; i++) {
   if (matches_RM[i].distance < 3 * min_dist) {
      good_matches.push_back(matches_RM[i]);
   }
}

我试过了

std::vector<DMatch> good_matches;
#pragma omp parallel for
for (int i = 0; i < descriptors_A.rows; i++) {
   if (matches_RM[i].distance < 3 * min_dist) {
      good_matches[i] = matches_RM[i];
   }
}

和

std::vector<DMatch> good_matches;
cv::DMatch temp;
#pragma omp parallel for
for (int i = 0; i < descriptors_A.rows; i++) {
   if (matches_RM[i].distance < 3 * min_dist) {
      temp = matches_RM[i];
      good_matches[i] = temp;
      // AND ALSO good_matches.push_back(temp);
   }

我也试过

#omp parallel critical 
good_matches.push_back(matches_RM[i]);

此条款有效，但不会加快速度。情况可能是这种for循环不能加速，但如果可以的话它会很好。我也想加快速度

std::vector<Point2f> obj, scene;
for (int i = 0; i < good_matches.size(); i++) {
   obj.push_back(keypoints_A[good_matches[i].queryIdx].pt);
   scene.push_back(keypoints_B[good_matches[i].trainIdx].pt);
}

如果这个问题得到了回答，请道歉并非常感谢能够提供帮助的任何人。

Answer 1

我在这里展示了如何执行此操作c-openmp-parallel-for-loop-alternatives-to-stdvector

创建std :: vector的私有版本并在关键部分填充共享std :: vector，如下所示：

std::vector<DMatch> good_matches;
#pragma omp parallel
{
    std::vector<DMatch> good_matches_private;
    #pragma omp for nowait
    for (int i = 0; i < descriptors_A.rows; i++) {
       if (matches_RM[i].distance < 3 * min_dist) {
          good_matches_private.push_back(matches_RM[i]);
       }
    }
    #pragma omp critical
    good_matches.insert(good_matches.end(), good_matches_private.begin(), good_matches_private.end());
}

Answer 2

一种可能性是为每个线程使用私有向量并最终将它们组合在一起：

#include<omp.h>

#include<algorithm>
#include<iterator>
#include<iostream>
#include<vector>

using namespace std;

int main()
{
  vector<int> global_vector;  
  vector< vector<int> > buffers;

  #pragma omp parallel
  {
    auto nthreads = omp_get_num_threads();
    auto id = omp_get_thread_num();
    //
    // Correctly set the number of buffers
    //
  #pragma omp single
    {
      buffers.resize( nthreads );
    }
    //
    // Each thread works on its chunk
    // If order is important maintain schedule static
    //
  #pragma omp for schedule(static)
    for(size_t ii = 0; ii < 100; ++ii) {      
      if( ii % 2 != 0 ) { // Any other condition will do
          buffers[id].push_back(ii);
      }
    }
    //
    // Combine buffers together
    //
    #pragma omp single
    {
      for( auto & buffer : buffers) {
        move(buffer.begin(),buffer.end(),back_inserter(global_vector));
      }
    }
  }
  //
  // Print the result
  //
  for( auto & x : global_vector) {
    cout << x << endl;
  }    
  return 0;
}

实际加速仅取决于每个循环内完成的工作量。

Answer 3

TBB concurrent_vector的行为与std::vector非常相似，但允许并行调用push_back。

使用openmp＆amp;并行化for循环替换push_back

3 个答案: