当我运行此代码时,我得到2542199.979500
作为答案。但是,正确的答案是1271099.989750
。有人可以告诉我错误在哪里吗?
这是包含错误的代码:
#include <omp.h>
#define N 1000
main ()
{
int i, nthreads;
int chunk = 10;
float a[N], b[N], c[N], d[N];
double result;
#pragma omp parallel
{
nthreads = omp_get_num_threads();
printf("no of threads %d", nthreads);
#pragma for shared(a,b,c,d,result) private(i) schedule(static,chunk)
for (i=0; i < N; i++){
a[i] = i * 1.5;
b[i] = i + 22.35;
}
#pragma for shared(a,b,c,d,result) private(i) schedule(static,chunk)
for(i=0; i < N; i++){
result = result + (a[i]+b[i]);
}
}
printf("value is %f", result);
}
此外,当线程数为3时,我得到了
3813299.969250
结果取决于使用的线程数。这可能是openmp中的错误,还是我做错了什么?
答案 0 :(得分:1)
我建议至少进行以下两项修改......
宣告result
...
// result should be initialized
double result = 0;
为了你最后的pragma ......
// specify the "reduction"
#pragma omp parallel for reduction(+:result)
如果没有指定“减少”,则result
的总和无效,因为result
将在每个线程中独立修改 - 导致竞争条件。
请参阅http://en.wikipedia.org/wiki/OpenMP#Reduction
#include <stdio.h>
#include <omp.h>
#define N 1000
int main ()
{
int i, nthreads;
int chunk = 10;
float a[N], b[N], c[N], d[N];
double result=0;
#pragma omp parallel
nthreads = omp_get_num_threads();
printf("no of threads %d\n", nthreads);
#pragma omp parallel for
for (i=0; i < N; i++){
a[i] = i * 1.5;
b[i] = i + 22.35;
}
#pragma omp parallel for reduction(+:result)
for(i=0; i < N; i++){
result = result + (a[i]+b[i]);
}
printf("value is %f", result);
return 0;
}
答案 1 :(得分:1)
请参阅内联评论。
// openmp.c
#include <stdio.h>
#include <omp.h>
#define N 1000
// main should return a int
int main(){
int i, nthreads;
float a[N], b[N];
// give result a initial value !
double result = 0;
#pragma omp parallel
{
nthreads = omp_get_num_threads();
// just print numthreads ONCE
#pragma omp single
printf("no. of threads %d\n", nthreads);
#pragma omp for
for (int i = 0; i < N; i++) {
a[i] = i *1.5;
b[i] = i + 22.35;
}
#pragma omp for
for (int i = 0; i < N; i++) {
double sum = a[i] + b[i];
// atomic operation needed !
#pragma omp atomic
result += sum;
}
#pragma omp single
printf("result = %f\n", result);
}
return 0;
}
使用cc -fopenmp -std=gnu99 openmp.c
进行编译,输出为:
no. of threads 4
result = 1271099.989750
答案 2 :(得分:0)
在openMP中,应该尝试最小化并行区域,在这种情况下,一个是可能的,因此就足够了。这是一个简单的C ++版本。
#include <iostream>
#include <iomanip>
#include <omp.h>
const int N=1000;
int main ()
{
const double A = 22.35;
const double B = 1.5;
double a[N], b[N], c[N], d[N];
double result=0;
#pragma omp parallel
{ // begin parallel region
#pragma omp master
std::cout << "no of threads: " << omp_get_num_threads() << std::endl;
// this loop and the following could be merged and the arrays avoided.
#pragma omp for
for(int i=0; i<N; ++i) {
a[i] = i * B;
b[i] = i + A;
}
#pragma omp for reduction(+:result)
for(int i=0; i<N; ++i)
result += a[i]+b[i];
} // end parallel region
double answer = N*(A+0.5*(B+1)*(N-1));
std::cout << "computed result = " << std::setprecision(16) << result
<< '\n'
<< "correct answer = " << std::setprecision(16) << answer
<< std::endl;
return 0;
}
我得到(在Mac OS X 10.6.8上使用gcc 4.6.2):
no of threads: 2
computed result = 1271099.999999993
correct answer = 1271100