Question

我有这段代码。我正在尝试应用OpenMP，__gnu_parallel::for_each以使其并行，但没有一种方法正常工作。

我该怎么办？这里make是一个集合的向量，集合中包含的类型是OctCell*。

算法提供正确的输出，但不会加速代码。我有4个核心。

void Oct :: applyFunction3(void (*Function)(OctCell* cell), unsigned int level)
{
    __gnu_parallel::for_each(make.at(level).begin(),make.at(level).end(),Function);
}

功能

void directionalSweepX(OctCell* cell) {
OctCell* positiveCell,*negativeCell;
     positiveCell = cell->getNeighbour(RIGHT);
   negativeCell = cell->getNeighbour(LEFT);
    addFluxToConserveds(cell, positiveCell, negativeCell, X);
}

addFluxtoConserveds执行以下操作

void addFluxToConserveds(OctCell* cell, OctCell* positiveCell, OctCell* negativeCell, SWEEP_DIRECTION direction) {

    double deltaT = pow(2.0, cell->getLevel() - cell->getParentOct()->lMin)*gDeltaT;
    // You have corrected that delta t is delta (L)
    double alpha = (1 << (int) cell->getParentOct()->lMin) * gDeltaT/gL;// whats the purpose f <<

    double beta = alpha/8.0;
    double gamma;
    double Flux[5] = {0.0, 0.0, 0.0, 0.0, 0.0};

    if ( positiveCell == 0) {
        Flux[direction+1] = getPressure(cell);
    } else if ( positiveCell->isLeaf() ) {
        computeFlux(cell, positiveCell, direction, Flux);
        gamma = (positiveCell->getLevel() == cell->getLevel())  ? alpha : beta;
    }

    for (int i=0; i<5; i++) {
        cell->mConserveds_n[i] -= alpha * Flux[i];
        if (positiveCell) positiveCell->mConserveds_n[i] += gamma * Flux[i];
    }

    Flux[0] = Flux[1] = Flux[2] = Flux[3] = Flux[4] = 0.0;

    if ( negativeCell == 0 ) {
        Flux[direction+1] = getPressure(cell);
    } else if (negativeCell->isLeaf() && negativeCell->getLevel() == cell->getLevel() - 1 ) {
        computeFlux(negativeCell, cell, direction, Flux);
    }

    for (int i=0; i<5; i++) {
        cell->mConserveds_n[i] += alpha * Flux[i];
        if (negativeCell) negativeCell->mConserveds_n[i] -= beta * Flux[i];
   }

}

Answer 1

使用#include <omp.h>。

在函数addFluxtoConserveds中，您可以为两个for循环添加#pragma omp for。这是因为每次迭代都不依赖于其他迭代来完成。由于您有一个对第二个for循环很重要的顺序代码，因此您无法在此处使用sections或tasks。

applyFunction3的顺序实施是什么？

你必须记住一个关于OpenMP的关键事情。在架构上编译的程序不会针对所有其他架构进行优化，即使在同一系列处理器中（英特尔核心组合与英特尔双核心;英特尔与amd等）。这意味着它在编译的原始架构上运行得很快，而在其他架构上运行速度非常快。

使用OpenMP或_gnu_parallel进行并行计算不会加快代码速度

1 个答案: