有人可以告诉我为什么基于OpenMP的Jacobi迭代比顺序版本慢得多吗?
ITERATION = 100;
DIM = 4000;
// sequential version takes only ~5 seconds
for(size_t i=0; i<ITERATION; ++i) {
for(size_t y=1; y<(DIM-1); ++y) {
for(int x=(y*DIM)+1; x<(y*DIM) + DIM-1; ++x) {
dest[x]=(src[x-DIM]+src[x+DIM]+src[x]+src[x-1]+src[x+1])*0.2;
}
}
std::swap(dest, src);
}
// openmp version takes 7s on 4 cores
for(size_t i=0; i<ITERATION; ++i) {
#pragma omp parallel for
for(size_t y=1; y<(DIM-1); ++y) {
for(int x=(y*DIM)+1; x<(y*DIM) + DIM-1; ++x) {
dest[x]=(src[x-DIM]+src[x+DIM]+src[x]+src[x-1]+src[x+1])*0.2;
}
}
std::swap(dest, src);
}
我使用四个线程并使用O2标志编译程序。