“私有变量不能减少”,尽管该变量是在SIMD块之外定义的

时间:2016-12-07 09:10:38

标签: c++ openmp llvm-clang

我有一个使用OpenMP的C ++项目,我尝试使用Blue Gene / Q上的LLVM编译它。有一个功能被剥离,看起来像这样:

template <typename FT, int veclen>
inline void xmyNorm2Spinor(FT *res,
                           FT *x,
                           FT *y,
                           double &n2res,
                           int n,
                           int n_cores,
                           int n_simt,
                           int n_blas_simt) {
#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
    double norm2res __attribute__((aligned(QPHIX_LLC_CACHE_ALIGN))) = 0;
#else
    __declspec(align(QPHIX_LLC_CACHE_ALIGN)) double norm2res = 0;
#endif

#pragma omp parallel shared(norm_array)
    {
        // […]
        if (smtid < n_blas_simt) {
            // […]

            double lnorm = 0;

//#pragma prefetch x,y,res
//#pragma vector aligned(x,y,res)
#pragma omp simd aligned(res, x, y : veclen) reduction(+ : lnorm)
            for (int i = low; i < hi; i++) {
                res[i] = x[i] - y[i];
                double tmpd = (double)res[i];
                lnorm += (tmpd * tmpd);
            }
            // […]
        }
    }
    // […]
}

错误就在这里:

In file included from /homec/hbn28/hbn28e/Sources/qphix/tests/timeDslashNoQDP.cc:6:
In file included from /homec/hbn28/hbn28e/Sources/qphix/include/qphix/blas.h:8:
/homec/hbn28/hbn28e/Sources/qphix/include/qphix/blas_c.h:156:54: error: private variable cannot be reduction
#pragma omp simd aligned(res,x,y:veclen) reduction(+:lnorm)
                                                     ^
/homec/hbn28/hbn28e/Sources/qphix/include/qphix/blas_c.h:151:12: note: predetermined as private
                                double lnorm=0;
                                       ^

由于外部omp parallel块,为每个线程定义了变量lnorm。然后还有一个额外的SIMD部分,其中每个线程使用SIMD通道。减少应该在线程内完成,因此变量的范围看起来正确。编译器仍然不希望这样。

这里有什么问题?

1 个答案:

答案 0 :(得分:1)

问题似乎是lnorm块附加到omp parallel变量的私有属性与OpenMP reduction()子句强加的要求冲突on the argument变量(即使lnorm 对于omp simd子句适用的嵌套reduction()块而言不是私有

您可以尝试通过将lnorm计算代码解压缩为自己的函数来解决该问题:

template <typename FT, int veclen>
inline double compute_res_and_lnorm(FT *res,
                           FT *x,
                           FT *y,
                           int low,
                           int hi)
{
    double lnorm = 0;

#pragma omp simd aligned(res, x, y : veclen) reduction(+ : lnorm)
    for (int i = low; i < hi; i++) {
        res[i] = x[i] - y[i];
        double tmpd = (double)res[i];
        lnorm += (tmpd * tmpd);
    }
    return lnorm;
}

template <typename FT, int veclen>
inline void xmyNorm2Spinor(FT *res,
                           FT *x,
                           FT *y,
                           double &n2res,
                           int n,
                           int n_cores,
                           int n_simt,
                           int n_blas_simt) {
#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
    double norm2res __attribute__((aligned(QPHIX_LLC_CACHE_ALIGN))) = 0;
#else
    __declspec(align(QPHIX_LLC_CACHE_ALIGN)) double norm2res = 0;
#endif

#pragma omp parallel shared(norm_array)
    {
        // […]
        if (smtid < n_blas_simt) {
            // […]
            double lnorm = compute_res_and_lnorm(res, x, y, low, hi);
            // […]
        }
    }
    // […]
}