我是OpenMP的新手,我正在尝试并行化这个循环:
int ii,jj,kk; /* generic counters */
const double c_sq = 1.0/3.0; /* square of speed of sound */
const double w0 = 4.0/9.0; /* weighting factor */
const double w1 = 1.0/9.0; /* weighting factor */
const double w2 = 1.0/36.0; /* weighting factor */
double u_x,u_y; /* av. velocities in x and y directions */
double u[NSPEEDS]; /* directional velocities */
double d_equ[NSPEEDS]; /* equilibrium densities */
double u_sq; /* squared velocity */
double local_density; /* sum of densities in a particular cell */
/* loop over the cells in the grid
** NB the collision step is called after
** the propagate step and so values of interest
** are in the scratch-space grid */
//#pragma omp parallel for private (ii, jj, kk, d_equ) shared (cells, tmp_cells)
for(ii=0;ii<params.ny;ii++) {
for(jj=0;jj<params.nx;jj++) {
/* don't consider occupied cells */
if(!obstacles[ii*params.nx + jj]) {
/* compute local density total */
local_density = 0.0;
for(kk=0;kk<NSPEEDS;kk++) {
local_density += tmp_cells[ii*params.nx + jj].speeds[kk];
}
/* compute x velocity component */
u_x = (tmp_cells[ii*params.nx + jj].speeds[1] +
tmp_cells[ii*params.nx + jj].speeds[5] +
tmp_cells[ii*params.nx + jj].speeds[8]
- (tmp_cells[ii*params.nx + jj].speeds[3] +
tmp_cells[ii*params.nx + jj].speeds[6] +
tmp_cells[ii*params.nx + jj].speeds[7]))
/ local_density;
u_y = (tmp_cells[ii*params.nx + jj].speeds[2] +
tmp_cells[ii*params.nx + jj].speeds[5] +
tmp_cells[ii*params.nx + jj].speeds[6]
- (tmp_cells[ii*params.nx + jj].speeds[4] +
tmp_cells[ii*params.nx + jj].speeds[7] +
tmp_cells[ii*params.nx + jj].speeds[8]))
/ local_density;
/* velocity squared */
u_sq = u_x * u_x + u_y * u_y;
/* directional velocity components */
u[1] = u_x; /* east */
u[2] = u_y; /* north */
u[3] = - u_x; /* west */
u[4] = - u_y; /* south */
u[5] = u_x + u_y; /* north-east */
u[6] = - u_x + u_y; /* north-west */
u[7] = - u_x - u_y; /* south-west */
u[8] = u_x - u_y; /* south-east */
/* equilibrium densities */
/* zero velocity density: weight w0 */
d_equ[0] = w0 * local_density * (1.0 - u_sq / (2.0 * c_sq));
/* axis speeds: weight w1 */
d_equ[1] = w1 * local_density * (1.0 + u[1] / c_sq
+ (u[1] * u[1]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[2] = w1 * local_density * (1.0 + u[2] / c_sq
+ (u[2] * u[2]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[3] = w1 * local_density * (1.0 + u[3] / c_sq
+ (u[3] * u[3]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[4] = w1 * local_density * (1.0 + u[4] / c_sq
+ (u[4] * u[4]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
/* diagonal speeds: weight w2 */
d_equ[5] = w2 * local_density * (1.0 + u[5] / c_sq
+ (u[5] * u[5]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[6] = w2 * local_density * (1.0 + u[6] / c_sq
+ (u[6] * u[6]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[7] = w2 * local_density * (1.0 + u[7] / c_sq
+ (u[7] * u[7]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[8] = w2 * local_density * (1.0 + u[8] / c_sq
+ (u[8] * u[8]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
/* relaxation step */
for(kk=0;kk<NSPEEDS;kk++) {
cells[ii*params.nx + jj].speeds[kk] = (tmp_cells[ii*params.nx + jj].speeds[kk]
+ params.omega *
(d_equ[kk] - tmp_cells[ii*params.nx + jj].speeds[kk]));
}
}
}
}
params.nx = 300&amp;对于300x200 d2q9 LB立方体,params.ny = 200 ...我评论的实用语句只会导致增加和减少;同时也会抛出雷诺数...我尝试合并2 for循环以避免可能的错误共享,使它看起来像这样:
c=params.nx*params.ny;
#pragma omp for private (ii,jj,kk,d_equ) shared (cells, tmp_cells)
for(ii=0;ii<c;ii++) {
/* don't consider occupied cells */
if(obstacles[ii]) {
/* called after propagate, so taking values from scratch space
** mirroring, and writing into main grid */
cells[ii].speeds[1] = tmp_cells[ii].speeds[3];
cells[ii].speeds[2] = tmp_cells[ii].speeds[4];
......
....
}
该pragma仍然无法帮助我加快速度..虽然我在这里得到了正确的结果..自上周六以来我一直在研究这个问题并且没有结束我收费的任何地方..没有找到很多帮助在线资源..我真的很感激帮助...
答案 0 :(得分:0)
这是我疯狂的猜测:
for(i = 0; i < params.nx * params.ny ; i++) {
double u[NSPEEDS];
double d_equ[NSPEEDS];
...
int kk;
}