我正在尝试并行化以下循环:
#pragma omp parallel for private(j,i,mxy) firstprivate(in,out,p)
for(int j = 0; j < Ny; j++) {
// #pragma omp parallel for private(i,mxy) firstprivate(in,my,j)
for(int i = 0; i < Nx; i++){
mxy = i + j*Nx;
in[i+1] = b_2D[mxy] + I*0.0 ;
}
fftw_execute(p);
for(int i = 0; i < Nx; i++){
mxy = i + j*Nx;
b_2D[mxy] = cimag(out[i+1]) ;
}
}
我的速度很快,但无论我设置为private和firstprivate的变量如何,我都会得到不同的结果。我相信这是正确的,但是为什么我得到的结果与我在系列中运行时的结果不同?
我尝试了以下内容:
fftw_make_planner_thread_safe();
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
#pragma omp parallel private(j,i,mxy) firstprivate(in,out)
{
fftw_plan p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
for( j = 0; j < N; j++)
in[j] = 0.0;
#pragma omp for
for( j = 0; j < Ny; j++) {
for( i = 0; i < Nx; i++)
in[i+1] = b_2D[i + j*Nx] + I*0.0;
fftw_execute(p);
for( i = 0; i < Nx; i++)
b_2D[i + j*Nx] = cimag(out[i+1]) ;
}
fftw_destroy_plan(p);
}
fftw_free(in);
fftw_free(out);
这给我错误:&#34;分段错误:11&#34;
如果我这样做:
fftw_make_planner_thread_safe();
#pragma omp parallel private(j,i,mxy)
{
fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
fftw_plan p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
for( j = 0; j < N; j++)
in[j] = 0.0;
#pragma omp for
for( j = 0; j < Ny; j++) {
for( i = 0; i < Nx; i++)
in[i+1] = b_2D[i + j*Nx] + I*0.0;
fftw_execute(p);
for( i = 0; i < Nx; i++)
b_2D[i + j*Nx] = cimag(out[i+1]) ;
}
fftw_destroy_plan(p);
fftw_free(in);
fftw_free(out);
}
我再次收到此错误:&#34;分段错误:11&#34; 但我又跑了,它说:
solver(9674,0x7fff74e22000) malloc: *** error for object 0x7f8d70f00410: double free
*** set a breakpoint in malloc_error_break to debug
Abort trap: 6
答案 0 :(得分:1)
您在所有线程中使用相同的计划p
调用FFTW。由于计划包含输入和输出缓冲区的位置(提供给fftw_plan_dft_whatever
计划构造函数的缓冲区),因此对fftw_execute
的所有并发调用将使用相同的缓冲区而不是私有副本。解决方案是为每个线程构建一个单独的计划:
#pragma omp parallel private(j,i,mxy) firstprivate(in,out)
{
// The following OpenMP construct enforces thread-safety
// Remove if the plan constructor is thread-safe
#pragma omp critical (plan_ops)
fftw_plan my_p = fftw_plan_dft_whatever(..., in, out, ...);
// my_p now refers the private in and out arrays
#pragma omp for
for(int j = 0; j < Ny; j++) {
for(int i = 0; i < Nx; i++){
mxy = i + j*Nx;
in[i+1] = b_2D[mxy] + I*0.0 ;
}
fftw_execute(my_p);
for(int i = 0; i < Nx; i++){
mxy = i + j*Nx;
b_2D[mxy] = cimag(out[i+1]) ;
}
}
// See comment above for the constructor operation
#pragma omp critical (plan_ops)
fftw_destroy_plan(my_p);
}
答案 1 :(得分:0)