/*
Includes
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h> /* Needed for sqrt(); compile with -lm. */
#include <mpi.h>
int main( int argc, char **argv )
{
int i, j, numprocs, rank, rowsPerProc;
double *x=NULL, *A=NULL, *b=NULL, *local_i, *global_i;
/*
Initialisation
*/
/* Start up MPI */
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &numprocs );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
int N = 128;
rowsPerProc = N / numprocs;
/* Allocate the arrays */
x = (double*) malloc( sizeof(double)*N);
b = (double*) malloc( sizeof(double)*N);
local_i = (double*) malloc( sizeof(double)*rowsPerProc);
if( rank==0 ) /* Rank 0 also stores the matrix A array */
{
global_i = (double*) malloc( sizeof(double)*N);
A = (double*) malloc( sizeof(double)*N*N );
}
for( i=0; i<N; i++ )
{
b[i] = 1.0;
x[i] = 0.0;
for( j=0; j<N; j++ )
{
A[i*N+j] = 0.0; /* A[i*N+j] = A[i][j] in the row-by-row allocation model */
if( i==j ) A[i*N+j] = - 2.0; /* Diagonal elements */
if( i==j+1 || i==j-1 ) A[i*N+j] = 1.0; /* Adjacent to the diagonal */
}
}
/* Synchronise x and b (currently only defined on rank==0) between all processes */
MPI_Bcast( x, N, MPI_DOUBLE, 0, MPI_COMM_WORLD );
MPI_Bcast( b, N, MPI_DOUBLE, 0, MPI_COMM_WORLD );
// Each strip has rowsPerProc = N/numprocs rows
MPI_Scatter(A, N*rowsPerProc, MPI_DOUBLE, b, N*rowsPerProc, MPI_DOUBLE, 0, MPI_COMM_WORLD);
/*
Main iteration loop.
*/
double convTol = 1e-4; /* Convergence tolerance */
printf("convTol %g.\n", convTol);
int maxIters = 10000; /* If too many iterations taken, quit with fail message */
double errorSumSqrd, old_x;
int iters = 0;
do
{
/* Set the error to zero; will update as we go along */
errorSumSqrd = 0.0;
/* Loop over 'red' and 'black' rows of A */
int redBlack;
for( redBlack=0; redBlack<2; redBlack++ ) /* i.e. 0 for red, 1 for black */
{
/* Loop over all red or black rows in the matrix. */
for( i=0; i<N; i++ )
if( i%2 == redBlack )
{
/* Calculate the new value of x corresponding to this matrix row */
double sum = 0.0;
for( j=0; j<N; j++ ) /* j is the column index */
if( j != i ) /* Loop over all non-diagonal elements */
sum += A[i*N+j] * x[j];
//sum = (i>0?A[i*N+i-1]*x[i-1]:0.0)+(i<N-1?A[i*N+i+1]*x[i+1]:0.0);
old_x = x[i];
//global_i = local_i + rank*rowsPerProc;
//printf("%f\n",global_i );
/* Complete the Gauss-Seidel calculation. */
x[i] = ( b[i] - sum ) / A[i*N+i]; /* A[i*N+i]=A[i][i] is the diagonal element for row/column i */
/* Update the error; sum of the squares of the change in x[i] */
errorSumSqrd += ( x[i] - old_x )*( x[i] - old_x );
}
MPI_Gather( local_i, rowsPerProc, MPI_DOUBLE, global_i, rowsPerProc, MPI_DOUBLE, 0, MPI_COMM_WORLD );
} /* End of 'redBlack' loop, i.e. a single, complete iteration */
/* Update the number of iterations; if 'too large' (suggesting it is not converging), quit */
if( ++iters == maxIters ) /* ++iters means: add one to iters, then check against maxIters */
{
printf( "Iteration converged after %i iterations; final error was %g.\n", iters, sqrt(errorSumSqrd/N) );
//printf( "Max. iters %i reached.\n", maxIters );
return EXIT_SUCCESS;;
}
/* The final error is the square-root of errorSumSqrd divided by the problem size N */
} while( sqrt(errorSumSqrd/N) > convTol );
/*
Clear up [one free() for each malloc()], and quit
*/
free( A );
free( b );
free( x );
MPI_Finalize();
return EXIT_SUCCESS;
}
我正在使用正确的MPI程序,但我无法在串行和并行实现之间进行转换。我认为这一行:
x[i] = ( b[i] - sum ) / A[i*N+i];
应该只考虑每个条带分区的局部对角线元素。后来聚集了。我不明白如何做到这一点。有人可以对此有所了解吗?