Question

当我运行此代码时，我得到2542199.979500作为答案。但是，正确的答案是1271099.989750。有人可以告诉我错误在哪里吗？

这是包含错误的代码：

#include <omp.h>
#define N 1000

main ()
{
    int i, nthreads;
    int chunk = 10;
    float a[N], b[N], c[N], d[N];
    double result;
    #pragma omp parallel 
    {
        nthreads = omp_get_num_threads();
        printf("no of threads %d", nthreads);     
        #pragma for shared(a,b,c,d,result) private(i) schedule(static,chunk)
        for (i=0; i < N; i++){
            a[i] = i * 1.5;
            b[i] = i + 22.35;
        }   
        #pragma for shared(a,b,c,d,result) private(i) schedule(static,chunk)
        for(i=0; i < N; i++){
            result = result + (a[i]+b[i]);
        }
    }
    printf("value is %f", result);
}

此外，当线程数为3时，我得到了 3813299.969250

结果取决于使用的线程数。这可能是openmp中的错误，还是我做错了什么？

Answer 1

我建议至少进行以下两项修改......

宣告result ...

// result should be initialized
double result = 0;

为了你最后的pragma ......

// specify the "reduction"
#pragma omp parallel for reduction(+:result)

如果没有指定“减少”，则result的总和无效，因为result将在每个线程中独立修改 - 导致竞争条件。

请参阅http://en.wikipedia.org/wiki/OpenMP#Reduction

#include <stdio.h>
#include <omp.h>
#define N 1000

int main ()
{

int i, nthreads;
int chunk = 10;
float a[N], b[N], c[N], d[N];
double result=0;

#pragma omp parallel
nthreads = omp_get_num_threads();
printf("no of threads %d\n", nthreads);

#pragma omp parallel for
for (i=0; i < N; i++){
  a[i] = i * 1.5;
  b[i] = i + 22.35;
}

#pragma omp parallel for reduction(+:result)
for(i=0; i < N; i++){
result = result + (a[i]+b[i]);
}

printf("value is %f", result);

return 0;
}

Answer 2

请参阅内联评论。

// openmp.c
#include <stdio.h>
#include <omp.h>

#define N 1000

// main should return a int
int main(){
    int i, nthreads;
    float a[N], b[N];
    // give result a initial value !
    double result = 0;

#pragma omp parallel
{
    nthreads = omp_get_num_threads();
    // just print numthreads ONCE
#pragma omp single
    printf("no. of threads %d\n", nthreads);

#pragma omp for
    for (int i = 0; i < N; i++) {
        a[i] = i *1.5;
        b[i] = i + 22.35;
    }

#pragma omp for
    for (int i = 0; i < N; i++) {
        double sum = a[i] + b[i];
// atomic operation needed !
#pragma omp atomic
        result += sum;
    }

#pragma omp single
    printf("result = %f\n", result);
}
    return 0;
}

使用cc -fopenmp -std=gnu99 openmp.c进行编译，输出为：

no. of threads 4
result = 1271099.989750

Answer 3

在openMP中，应该尝试最小化并行区域，在这种情况下，一个是可能的，因此就足够了。这是一个简单的C ++版本。

#include <iostream>
#include <iomanip>
#include <omp.h>

const int N=1000;

int main ()
{
  const double A = 22.35;
  const double B = 1.5;

  double a[N], b[N], c[N], d[N];
  double result=0;

#pragma omp parallel
  { // begin parallel region
#pragma omp master
    std::cout << "no of threads: " << omp_get_num_threads() << std::endl;

    // this loop and the following could be merged and the arrays avoided.
#pragma omp for
    for(int i=0; i<N; ++i) {
      a[i] = i * B;
      b[i] = i + A;
    }
#pragma omp for reduction(+:result)
    for(int i=0; i<N; ++i)
      result += a[i]+b[i];
  } // end parallel region

  double answer = N*(A+0.5*(B+1)*(N-1));

  std::cout << "computed result = " << std::setprecision(16) << result
            << '\n'
            << "correct answer  = " << std::setprecision(16) << answer
            << std::endl;

  return 0;
}

我得到（在Mac OS X 10.6.8上使用gcc 4.6.2）：

no of threads: 2
computed result = 1271099.999999993
correct answer  = 1271100

没有得到正确的总和 - openmp

3 个答案: