我是openMP的初学者,我想简单地并行编写三个独立的块。我使用了“单次等待”结构。所示的变量是独立的。类型“ SpDoubleVec”和函数“ transpose”和“ prune”与本征库有关。该代码有效,但效率不高。 有什么意见可以加快吗?
SpDoubleVec x1, x2, x3, x4, b1, b2, b3, g1, g2, g3;
double lambda1, lambda2, lambda3;
double t1, t2, t3;
omp_set_num_threads(3);
#pragma omp parallel
{
#pragma omp single nowait
{
SpDoubleVec v1;
v1 = lambda1 * x1 + (1 - lambda1) * x2 - b1;
t1 = g1.transpose() * v1;
}
#pragma omp single nowait
{
SpDoubleVec v2;
v2 = lambda2 * x3 + (1 - lambda2) * zx4 - b2;
for (SpDoubleVec::InnerIterator it1(v2); it1; ++it1)
if (it1.value()< 0)
it1.valueRef() = 0;
v2.prune(0.0);
t2 = g2.transpose() * v2;
}
#pragma omp single nowait
{
SpDoubleVec v3;
v3 = b3 - lambda3 * x5 - (1 - lambda3) * x6;
for (SpDoubleVec::InnerIterator it2(v3); it2; ++it2)
if (it2.value()< 0)
it2.valueRef() = 0;
v3.prune(0.0);
t3 = g3.transpose() * v3;
}
}
t = t1 + t2 + t3;