VC ++,测试嵌套循环的OpenMP并行性,但它给出了奇怪的结果:
结果如下:
Sum=450000000000.000000 Serial time: 117376.852855
Sum=228067994200.000000 Parallel time: 117391.867931
以下是工作代码:
#include "stdafx.h"
#include <omp.h>
#include <ppl.h>
double Serial(), Parallel();
int _tmain(int argc, _TCHAR* argv[])
{
printf("Sum=%f Serial time: %lf\n",Serial(),omp_get_wtime() );
printf("Sum=%f Parallel time: %lf\n",Parallel(),omp_get_wtime() );
return 0;
}
double Serial()
{
double sum=0.;
int n=10;
for (int i01=0; i01<n; i01++){
for (int i02=0; i02<n; i02++){
for (int i03=0; i03<n; i03++){
for (int i04=0; i04<n; i04++){
for (int i05=0; i05<n; i05++){
for (int i06=0; i06<n; i06++){
for (int i07=0; i07<n; i07++){
for (int i08=0; i08<n; i08++){
for (int i09=0; i09<n; i09++){
for (int i10=0; i10<n; i10++){
sum+=i01+i02+i03+i04+i05+i06+i07+i08+i09+i10;
}}}}} }}}}}
return sum;
}
double Parallel()
{
double sum=0.;
#pragma omp parallel for shared(sum)
for (int i01=0; i01<10; i01++){
for (int i02=0; i02<10; i02++){
for (int i03=0; i03<10; i03++){
for (int i04=0; i04<10; i04++){
for (int i05=0; i05<10; i05++){
for (int i06=0; i06<10; i06++){
for (int i07=0; i07<10; i07++){
for (int i08=0; i08<10; i08++){
for (int i09=0; i09<10; i09++){
for (int i10=0; i10<10; i10++){
sum+=i01+i02+i03+i04+i05+i06+i07+i08+i09+i10;
}}}}} }}}}}
return sum;
}
答案 0 :(得分:0)
您已经正确地猜到应该共享sum
,但实际上它是一个简化变量,必须放在reduction
子句中:
#pragma omp parallel for reduction(+:sum)
另一个选项(较慢)将使增量原子。在这种情况下,原子操作可能是一种过度杀伤,但是当简单的缩减不适用时,它们可以帮助处理更复杂的情况,例如在减少数组时:
#pragma omp atomic update
sum+=i01+i02+i03+i04+i05+i06+i07+i08+i09+i10;
update
子句来自OpenMP 3.1。 Visual Studio只支持2.0,所以它不会理解它,但你永远不会知道 - 你可能希望有一天能够转向更好的编译器。