我正在尝试使用MPI执行高斯消除,其中我很新。据我所知,前向消除是可以并行完成的,而后向替换可以在主处理器上完成,因为通信成本。
在下面的代码中,我试图实现它,但似乎我在执行前向消除时会出现某种通信错误。当输入矩阵作为增广矩阵读取时,输入矩阵为N *(N + 1)。每个处理器都应该读取它的矩阵份额,因此处理器需要相应地发送值。
输入矩阵指定为以下格式的文件:
6.807000 5.249000 0.073000 3.658000 8.930000 1.272000 7.544000 0.878000 1.000000
7.709000 4.440000 8.165000 4.492000 3.042000 7.987000 2.503000 2.327000 2.000000
8.840000 2.612000 4.303000 3.169000 7.709000 7.157000 9.560000 0.933000 3.000000
0.278000 1.816000 5.335000 9.097000 7.826000 3.512000 9.267000 3.810000 4.000000
0.979000 9.149000 6.579000 8.821000 1.967000 0.672000 1.393000 9.336000 5.000000
1.745000 5.228000 4.091000 0.194000 6.357000 5.001000 1.153000 6.708000 6.000000
5.668000 1.490000 8.124000 2.196000 9.530000 0.903000 7.722000 4.666000 7.000000
8.024000 7.801000 6.853000 0.977000 7.408000 8.228000 4.933000 0.298000 8.000000
结果矩阵应该是这样的:
1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -0.733315
0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.806801
0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.843402
0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 -0.912938
0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 -0.146581
0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 -0.048743
0.000000 0.000000 0.000000 0.000000 0.000000 -0.000000 1.000000 0.000000 0.849379
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 -0.002284
然而,我得到的是:
1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -nan
0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -nan
7.431000 2.624000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -nan
0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 -nan
3.714586 -0.915696 -1.032000 -5.928000 1.000000 0.000000 0.000000 0.000000 -nan
0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 -nan
-8.653789 -0.773373 1.057501 6.074484 0.000000 0.000000 1.000000 0.000000 -nan
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 -nan
任何人都可以提供一些有关出错的见解吗?我想使用障碍来确保处理器不会启动,直到所有进程都从文件中读取了它们的矩阵份额。在正向消除之后还有另一个障碍,以确保在执行后向替换之前所有处理器都已完成。是否需要这些障碍?
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include "math.h"
#include "mpi.h"
#define SUCCESS 0
#define ERROR -1
#define EPSILON 0.000001
void read_matrix_size_from_file(char *filename, int *rows, int *columns);
double ** read_user_matrix_from_file(char *filename, int rows, int columns, int rank, int nprocs);
double ** allocate_matrix(int, int);
void free_matrix(double **matrix, int rows);
void divide_by_max(double **, int, int);
void input_clicking_probabilities(double **, int, int, double *);
void write_clicking_probabilities_to_file(double *cp, int rows);
void print_best_acceptance_threshold(double *, int);
void print_matrix(double **, int, int);
int main (int argc, char** argv)
{
if (argc != 2)
{
printf("please provide a user matrix!\n");
return ERROR;
}
/* init */
int rank, nprocs;
MPI_Init( &argc, &argv);
MPI_Comm_rank( MPI_COMM_WORLD, &rank);
MPI_Comm_size( MPI_COMM_WORLD, &nprocs);
clock_t begin1, end1, begin2, end2;
/* setup */
int rows, columns;
double **A;
double *cp;
if (rank == 0)
{
printf("malloc cp process %d\n", rank);
read_matrix_size_from_file(argv[1], &rows, &columns);
cp = malloc(columns * sizeof(double));
}
/* Each process will read its own subset of the larger matrix */
MPI_Bcast(&rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&columns, 1, MPI_INT, 0, MPI_COMM_WORLD);
/* Split the matrix into submatrixs */
//int rpp = rows / rank;
//int rrpp = rows % rank;
printf("WE reached here process %d\n", rank);
int i,j,k;
int *map = malloc(sizeof(int) * rows);
int stop = 0;
while(1)
{
for(i=0;i<nprocs; i++)
{
if (rank == i)
{
printf("Reading for process %d\n", rank);
A = read_user_matrix_from_file(argv[1], rows, columns, rank, nprocs);
stop = 1;
}
}
if (stop)
{
break;
}
}
MPI_Barrier(MPI_COMM_WORLD);
for (i = 0; i < rows; i++)
{
map[i] = i % nprocs;
}
printf("Process %d is here\n", rank);
for(k=0; k<rows; k++)
{
//printf("Broadcast recieved/sent in process %d from %d\n", rank, map[k]);
MPI_Bcast (&A[k][k],rows-k,MPI_DOUBLE,map[k],MPI_COMM_WORLD);
printf("process %d, broadcast b[%d] as %G\n", rank,k,A[k][columns-1] );
MPI_Bcast (&A[k][columns-1],1,MPI_DOUBLE,map[k],MPI_COMM_WORLD);
double pivot;
for(i= k+1; i<rows; i++)
{
if(map[i] == rank)
{
pivot = A[i][k]/A[k][k];
}
}
for(i= k+1; i<rows; i++)
{
if(map[i] == rank)
{
for(j=0;j<rows;j++)
{
A[i][j]=A[i][j]-( pivot * A[k][j] );
}
A[i][columns-1]= A[i][columns-1]-( pivot * A[k][columns-1] );
printf("b[%d] is %G\n", i, A[i][columns-1]);
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
printf("Process %d finished\n", rank);
if (rank == 0)
{
/* Back-substitution */
int row,row2;
for (row = rows-1; row >= 0; row--) {
A[row][columns-1] = A[row][columns-1] / A[row][row];
printf("divide %G by %G\n",A[row][columns-1] , A[row][row]);
A[row][row] = 1;
for (row2 = row-1; row2 >= 0; row2--) {
A[row2][columns-1] += A[row2][row]*A[row][columns-1];
A[row2][row] = 0;
}
}
}
if (rank == 0)
{
print_matrix(A, rows, columns);
divide_by_max(A, rows, columns);
/* results */
input_clicking_probabilities(A, rows, columns, cp);
print_best_acceptance_threshold(cp, rows);
write_clicking_probabilities_to_file(cp, rows);
printf("Free cp process %d\n", rank);
free(cp);
printf("Freed cp\n");
}
/* results */
free_matrix(A, rows);
MPI_Finalize();
return SUCCESS;
}
void read_matrix_size_from_file(char *filename, int *rows, int *columns)
{
FILE *file;
file = fopen(filename, "r");
/* get number of rows and columns*/
*rows = 1;
*columns = 1;
char c;
int columns_known = 0;
while(!feof(file)) {
c = fgetc(file);
if (c == ' ') {
if (!columns_known) (*columns)++;
}
if (c == '\n') {
(*rows)++;
columns_known = 1;
continue;
}
}
printf("There are %d rows and %d columns\n", *rows, *columns);
fclose(file);
}
double ** read_user_matrix_from_file(char *filename, int rows, int columns, int rank, int nprocs)
{
FILE *file;
file = fopen(filename, "r");
/* read values into an array */
//rewind(file);
printf("Rank is %d\n", rank);
printf("Nprocess is %d\n", nprocs);
printf("Rows is %d and columns is %d\n", rows, columns);
double **matrix = allocate_matrix(rows, columns);
int i,j;
for (i = 0; i < rows; i++)
{
if (rank == i % nprocs)
{
for (j = 0; j < columns; j++)
{
fscanf(file, "%lf", &matrix[i][j]);
}
}
}
fclose(file);
return matrix;
}
double ** allocate_matrix(int rows, int columns)
{
double ** matrix = (double **) malloc(sizeof(double *) * rows);
int i;
for (i = 0; i < rows; i++)
{
matrix[i] = (double *) malloc(sizeof(double) * columns);
}
return matrix;
}
void free_matrix(double **matrix, int rows)
{
int i;
for (i = 0; i < rows; i++)
{
free(matrix[i]);
}
free(matrix);
}
void input_clicking_probabilities(double **matrix, int rows, int columns, double *cp) {
int row;
for (row = 0; row < rows; row++) {
cp[row] = matrix[row][columns-1];
}
}
void write_clicking_probabilities_to_file(double *cp, int rows)
{
/* write clicking probabilities to file */
FILE *output_file;
int row;
output_file = fopen("clicking_probabilities.txt","w");
for (row = 0; row < rows; row++) {
fprintf(output_file, "%lf\n", cp[row]);
}
fclose(output_file);
}
void print_matrix(double **matrix, int rows, int columns)
{
FILE *output_file;
int row, column;
output_file = fopen("row_reduced_matrix.txt","w");
for (row = 0; row < rows; row++) {
for (column = 0; column < columns; column++) {
fprintf(output_file,"%lf ",matrix[row][column]);
}
fprintf(output_file,"\n");
}
fclose(output_file);
}
void print_best_acceptance_threshold(double *cp, int rows) {
}
void divide_by_max(double **matrix, int rows, int columns) {
double max = 0;
int row, column;
/* get max so we can divide by this later to get probabilities */
for (row = 0; row < rows; row++) {
if (max < fabs(matrix[row][columns-1])) max = fabs(matrix[row][columns-1]);
}
/* divide by max and take abs */
for (row = 0; row < rows; row++) {
/* check for division by zero */
if (equals(max,0)) {
matrix[row][columns-1] = 0;
} else {
matrix[row][columns-1] = fabs (matrix[row][columns-1]) / max;
}
}
}
int equals(double a, double b) {
if (fabs(a-b) < EPSILON) return 1;
else return 0;
}
答案 0 :(得分:0)
上述结果(NAN)的原因是从文件中读取。在read_user_matrix_from_file函数中,有一个带有fscanf的for循环。请注意,递增循环会使 NOT 增加文件指针。通过读取行(而不是文件指针)来增加文件指针来解决问题。可能有一种更有效的方法,但为了简单起见,这就是可以做到的。另请注意,由于MPI_Bcast是阻塞调用,因此不需要障碍。从文件中读取的循环应如下所示:
for (i = 0; i < rows; i++)
{
printf("process %d, %d mod %d = %d\n", world_rank, i, nprocs, i% nprocs);
if (world_rank == i % nprocs)
{
for (j = 0; j < columns; j++)
{
fscanf(file, "%lf", &matrix[i][j]);
}
}else {
for ( j= 0; j < columns; j++)
{
fscanf(file, "%lf", &buf[j]);
matrix[i][j] = 0;
}
}
}