我使用MPI和OMP进行矩阵乘法代码时出现问题。代码被正确编译但是它给出了错误的结果,矩阵c中的值(在matmul函数中)是大的,矩阵C(在main中)甚至不能从函数matmul得到结果。如果有人知道如何解决它,请帮助。
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <omp.h>
#include <mpi.h>
int offset,rows,br_elemenata,cvor_id,cvor,ukupno;
MPI_Status status;
double gettime(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + 1e-6 * tv.tv_usec;
}
void matfill(long N, double *mat, double val) {
long i, j;
for(i = 0; i < N; i ++)
for(j = 0; j < N; j ++)
mat[i * N + j] = val;
}
void matmul(long N, double *a, double *b, double *c) {
long i, j, k;
br_elemenata = N / ukupno; //odredjujemo broj elemenata po cvoru
if (N % ukupno != 0) br_elemenata++; //inkrementujemo broj elemenata po cvoru kako ne bismo neki izostavili
if (cvor == 0){
for (cvor_id=1;cvor_id<ukupno;cvor_id++){
offset = cvor_id * br_elemenata;
rows = N - offset;
if (rows > br_elemenata)
rows = br_elemenata;
// slanje podataka sa cvora 0 na ostale cvorove
MPI_Send(&offset, 1, MPI_INT, cvor_id, 0, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, cvor_id, 0, MPI_COMM_WORLD);
MPI_Send(a+offset, rows*N, MPI_DOUBLE, cvor_id, 0, MPI_COMM_WORLD);
MPI_Send(b, N*N, MPI_DOUBLE, cvor_id, 0, MPI_COMM_WORLD);
}
offset = 0;
rows = br_elemenata;
} else {
// Primanje podataka sa cvora 0
MPI_Recv(&offset, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(a+offset, rows*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(b, N*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
}
MPI_Barrier(MPI_COMM_WORLD);
#pragma omp parallel for shared(a,b,c) private(i,j,k)
for (i = offset; i < offset + rows; i ++)
for (j = 0; j < N; j ++)
for (k = 0; k < N; k ++)
c[i + j] += a[i + k] * b[k * N + j];
printf("Clan: %5.2f\n",c[i]);
if (cvor == 0) {
for (cvor_id = 1; cvor_id < ukupno; cvor_id++) {
MPI_Recv(&offset, 1, MPI_INT, cvor_id, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, cvor_id, 1, MPI_COMM_WORLD, &status);
MPI_Recv(c+offset, rows*N, MPI_DOUBLE, cvor_id, 1, MPI_COMM_WORLD, &status);
}
} else {
MPI_Send(&offset, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
MPI_Send(c+offset, rows*N, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
}
}
int main(int argc, char **argv) {
long N;
double *A, *B, *C, t;
MPI_Init(&argc,&argv); //Inicijalizacija MPI
MPI_Comm_size(MPI_COMM_WORLD,&ukupno); //odredjujemo ukupan broj cvorova
MPI_Comm_rank(MPI_COMM_WORLD,&cvor); //odredjuje redni broj cvora, nacin da se svaki cvor identifikuje u komunikaciji
if (argc!=2) {
if (cvor==0) printf("Morate unijeti dimenziju matrice!");
MPI_Finalize(); // ako ne postoji argument pri pozivu funkcije, zavrsiti program
return 1;
}
N = atoi(argv[1]);
A = (double *) malloc(N * N * sizeof(double));
B = (double *) malloc(N * N * sizeof(double));
C = (double *) malloc(N * N * sizeof(double));
matfill(N, A, 1.0);
matfill(N, B, 2.0);
matfill(N, C, 0.0);
t = gettime();
matmul(N, A, B, C);
t = gettime() - t;
// if (cvor == 0){
fprintf(stdout, "%ld\t%le\t%le\n", N, t, (2 * N - 1) * N * N / t);
fflush(stdout);
printf("Clan: %f\n",C[6]);
// }
free(A);
free(B);
free(C);
return EXIT_SUCCESS;
}
答案 0 :(得分:1)
主要问题是通信操作期间的offset
。它应该是offset*N
。
更正后的代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <omp.h>
#include <mpi.h>
int offset,rows,br_elemenata,cvor_id,cvor,ukupno;
MPI_Status status;
double gettime(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + 1e-6 * tv.tv_usec;
}
void matfill(long N, double *mat, double val) {
long i, j;
for(i = 0; i < N; i ++)
for(j = 0; j < N; j ++)
mat[i * N + j] = val;
}
void matprint(long N, double *mat) {
long i, j;
for(i = 0; i < N; i ++){
for(j = 0; j < N; j ++){
printf("%g ",mat[i*N+j]);
}
printf("\n");
}
}
void matdiag(long N, double *mat, double val) {
long i, j;
for(i = 0; i < N; i ++)
for(j = 0; j < N; j ++)
if(i==j){
mat[i * N + j] = (double)i;
}else{
mat[i * N + j] =0;
}
}
void matmul(long N, double *a, double *b, double *c) {
long i, j, k;
br_elemenata = N / ukupno; //odredjujemo broj elemenata po cvoru
if (N % ukupno != 0) br_elemenata++; //inkrementujemo broj elemenata po cvoru kako ne bismo neki izostavili
if (cvor == 0){
for (cvor_id=1;cvor_id<ukupno;cvor_id++){
offset = cvor_id * br_elemenata;
rows = N - offset;
if (rows > br_elemenata)
rows = br_elemenata;
// slanje podataka sa cvora 0 na ostale cvorove
MPI_Send(&offset, 1, MPI_INT, cvor_id, 0, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, cvor_id, 1, MPI_COMM_WORLD);
MPI_Send(a+(offset*N), rows*N, MPI_DOUBLE, cvor_id, 2, MPI_COMM_WORLD);
MPI_Send(b, N*N, MPI_DOUBLE, cvor_id, 3, MPI_COMM_WORLD);
}
offset = 0;
rows = br_elemenata;
} else {
// Primanje podataka sa cvora 0
MPI_Recv(&offset, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
MPI_Recv(a+(offset*N), rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, &status);
MPI_Recv(b, N*N, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, &status);
}
MPI_Barrier(MPI_COMM_WORLD);
#pragma omp parallel for shared(a,b,c) private(i,j,k)
for (i = offset; i < offset + rows; i ++)
for (j = 0; j < N; j ++)
for (k = 0; k < N; k ++)
c[i*N + j] += a[i*N + k] * b[k * N + j];
printf("Clan: %5.2f\n",c[i]);
if (cvor == 0) {
for (cvor_id = 1; cvor_id < ukupno; cvor_id++) {
MPI_Recv(&offset, 1, MPI_INT, cvor_id, 4, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, cvor_id, 5, MPI_COMM_WORLD, &status);
MPI_Recv(c+(N*offset), rows*N, MPI_DOUBLE, cvor_id, 6, MPI_COMM_WORLD, &status);
}
} else {
MPI_Send(&offset, 1, MPI_INT, 0, 4, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, 0, 5, MPI_COMM_WORLD);
MPI_Send(c+(N*offset), rows*N, MPI_DOUBLE, 0, 6, MPI_COMM_WORLD);
}
}
int main(int argc, char **argv) {
long N;
double *A, *B, *C, t;
MPI_Init(&argc,&argv); //Inicijalizacija MPI
MPI_Comm_size(MPI_COMM_WORLD,&ukupno); //odredjujemo ukupan broj cvorova
MPI_Comm_rank(MPI_COMM_WORLD,&cvor); //odredjuje redni broj cvora, nacin da se svaki cvor identifikuje u komunikaciji
if (argc!=2) {
if (cvor==0) printf("Morate unijeti dimenziju matrice!");
MPI_Finalize(); // ako ne postoji argument pri pozivu funkcije, zavrsiti program
return 1;
}
N = atoi(argv[1]);
A = (double *) malloc(N * N * sizeof(double));
B = (double *) malloc(N * N * sizeof(double));
C = (double *) malloc(N * N * sizeof(double));
matfill(N, A, 1.0);
matfill(N, B, 2.0);
matfill(N, C, 0.0);
matdiag(N,A, 1) ;
t = gettime();
matmul(N, A, B, C);
t = gettime() - t;
if (cvor == 0){
fprintf(stdout, "%ld\t%le\t%le\n", N, t, (2 * N - 1) * N * N / t);
fflush(stdout);
printf("Clan: %f\n",C[6]);
printf("A\n");
matprint(N, A) ;
printf("B\n");
matprint(N, B) ;
printf("C\n");
matprint(N, C) ;
}
free(A);
free(B);
free(C);
MPI_Finalize();
return EXIT_SUCCESS;
}
要编译:{{1}}要运行:mpicc main.c -o main
如果您希望更进一步,您会对MPI_Bcast()
函数感兴趣,该函数会向每个人发送相同的内容。 MPI_Scatter()
和MPI_Gather()
有助于分发矩阵或将其恢复到特定流程。
此外,dgemm()
的BLAS函数可用于加速给定过程的计算。
为了减少内存占用量,mpirun -np 4 main
和A
的分配大小可能会减少到C
(流程0除外)...并且偏移量必须更改......再次!