我有一个使用OpenMP的C ++项目,我尝试使用Blue Gene / Q上的LLVM编译它。有一个功能被剥离,看起来像这样:
template <typename FT, int veclen>
inline void xmyNorm2Spinor(FT *res,
FT *x,
FT *y,
double &n2res,
int n,
int n_cores,
int n_simt,
int n_blas_simt) {
#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
double norm2res __attribute__((aligned(QPHIX_LLC_CACHE_ALIGN))) = 0;
#else
__declspec(align(QPHIX_LLC_CACHE_ALIGN)) double norm2res = 0;
#endif
#pragma omp parallel shared(norm_array)
{
// […]
if (smtid < n_blas_simt) {
// […]
double lnorm = 0;
//#pragma prefetch x,y,res
//#pragma vector aligned(x,y,res)
#pragma omp simd aligned(res, x, y : veclen) reduction(+ : lnorm)
for (int i = low; i < hi; i++) {
res[i] = x[i] - y[i];
double tmpd = (double)res[i];
lnorm += (tmpd * tmpd);
}
// […]
}
}
// […]
}
错误就在这里:
In file included from /homec/hbn28/hbn28e/Sources/qphix/tests/timeDslashNoQDP.cc:6:
In file included from /homec/hbn28/hbn28e/Sources/qphix/include/qphix/blas.h:8:
/homec/hbn28/hbn28e/Sources/qphix/include/qphix/blas_c.h:156:54: error: private variable cannot be reduction
#pragma omp simd aligned(res,x,y:veclen) reduction(+:lnorm)
^
/homec/hbn28/hbn28e/Sources/qphix/include/qphix/blas_c.h:151:12: note: predetermined as private
double lnorm=0;
^
由于外部omp parallel
块,为每个线程定义了变量lnorm
。然后还有一个额外的SIMD部分,其中每个线程使用SIMD通道。减少应该在线程内完成,因此变量的范围看起来正确。编译器仍然不希望这样。
这里有什么问题?
答案 0 :(得分:1)
问题似乎是lnorm
块附加到omp parallel
变量的私有属性与OpenMP reduction()
子句强加的要求冲突on the argument变量(即使lnorm
对于omp simd
子句适用的嵌套reduction()
块而言不是私有。
您可以尝试通过将lnorm
计算代码解压缩为自己的函数来解决该问题:
template <typename FT, int veclen>
inline double compute_res_and_lnorm(FT *res,
FT *x,
FT *y,
int low,
int hi)
{
double lnorm = 0;
#pragma omp simd aligned(res, x, y : veclen) reduction(+ : lnorm)
for (int i = low; i < hi; i++) {
res[i] = x[i] - y[i];
double tmpd = (double)res[i];
lnorm += (tmpd * tmpd);
}
return lnorm;
}
template <typename FT, int veclen>
inline void xmyNorm2Spinor(FT *res,
FT *x,
FT *y,
double &n2res,
int n,
int n_cores,
int n_simt,
int n_blas_simt) {
#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
double norm2res __attribute__((aligned(QPHIX_LLC_CACHE_ALIGN))) = 0;
#else
__declspec(align(QPHIX_LLC_CACHE_ALIGN)) double norm2res = 0;
#endif
#pragma omp parallel shared(norm_array)
{
// […]
if (smtid < n_blas_simt) {
// […]
double lnorm = compute_res_and_lnorm(res, x, y, low, hi);
// […]
}
}
// […]
}