Question

我是OpenMP的新手，我正在尝试并行化以下for循环，其中包括2个内部for循环。使用，第二个内部循环取决于第一个内部循环的结果。

通过阅读following answer，我并行化了外部循环。

现在，我想并行化 2个内部循环。为了解决这个问题，我想到了嵌套它们。

我担心的是会出现的比赛条件。我已经读过private vars的声明。这是避免此问题的方法吗？

我担心的另一点是，我已经在外部for循环之外初始化了大多数变量。在针对this thread的问题发表评论之后，我认为应该指出这一点。

#include <iostream>
#include <cstring>
#include <vector>
#include <string>
#include <random>
#include <algorithm>
#include <omp.h>

int main(int argc, const char *argv[]) {

  const int N = 1000000;
  const double T = 1;
  const int r = 0;
  const int K = 100;
  const double s = 0.5;
  const int S0 = 100;
  const int m = 1000;
  const double k=0.9;

  const double D = T / m;

  std::vector<double> A;
  std::vector<double> kappa(21, 0);
  std::vector<double> U(21, 0);

  const double distMean = (r -  pow (s, 2.0) / 2) * D;
  const double distStdDev = s * pow(D, 0.5);
  const int distSize = 1000;

  #pragma omp parallel for num_threads(8)
  for (int i = N; i > 0; --i) {

    std::random_device randDev; //print this one separately
    std::mt19937 generator(randDev());
    std::normal_distribution<double> dist(distMean, distStdDev);

    std::vector<double> X;
    X.reserve(distSize + 1);

    #pragma omp parallel for //num_threads(8)
    for (int m = distSize; m > 0; --m) {
      X.push_back(dist(generator)); 
    }

    std::vector<double> XCumSum(1000);
    std::partial_sum(X.begin(), X.end(), XCumSum.begin(), std::plus<double>());

    std::vector<double> expCumSum;
    expCumSum.reserve(distSize + 1);

    for (int m = distSize; m > 0; --m) {
      expCumSum.push_back( exp( XCumSum[m] ) );  
    }

    double maxPart2 = k * S0 * ( std::accumulate(expCumSum.begin(), expCumSum.end(), 0.0) / expCumSum.size() ) ;
    double maxPart1 = S0 * exp(std::accumulate(X.begin(), X.end(), 0.0));
    double result = exp(-r*T) * std::max( (maxPart1 - maxPart2), 0.0 );

    A.reserve(N + 1);
    A.push_back(result);

  }
  float ASumMean = ( std::accumulate(A.begin(), A.end(), 0.0) ) / A.size();
  std::cout << ASumMean << std::endl;
  return ASumMean;
}

我用

进行编译

g++-8.2 -O3 -o tests tests.cpp

没有fopenmp，我的结果是：

16.6318

real    1m27.113s
user    1m14.483s
 sys    0m11.217s

使用fopenmp和g++-8.2 -O3 -o tests tests.cpp -fopenmp，结果为：

16.6555

real    0m39.474s
user    1m42.468s
 sys    0m30.546s

这些是预期的结果，但是时间和加速使我怀疑循环的实际编码。

PS：对C ++来说还很新。我们总是欢迎您提供有关如何改善我的风格的任何建议或最佳实践建议。

OPENMP C ++嵌套用于循环和进一步优化

0 个答案: