我写了一个平行的Jacobi算法,它解决了稀疏线性系统。矩阵的所有非零系数已经以向量内的修改的CRS形式存储,每行的第一个元素是对角元素。这是主要的例程:
#include <iostream>
#include <iomanip>
#include <string>
#include <vector>
#include <cmath>
#include <chrono>
#include <mpi.h>
using namespace std;
using namespace chrono;
using double_vector = vector<double>;
using int_vector = vector<int>;
int main() {
MPI_Init(NULL, NULL);
int WorldRank;
MPI_Comm_rank(MPI_COMM_WORLD, &WorldRank);
int WorldSize;
MPI_Comm_size(MPI_COMM_WORLD, &WorldSize);
int Order = 8;
int Length = 22;
double_vector Coefficients(Length);
int_vector RowIndex(Order+1);
int_vector ColumnIndex(Length);
double_vector InitialAnswer(Order);
double_vector IndependentTerm(Order);
if (WorldRank == 0) {
Coefficients = {10, 1, 10, 1, 1, 10, 1, 1, 10, 1, 1, 10, 1, 1, 10, 1, 1, 10, 1, 1, 10, 1};
RowIndex = {0, 2, 5, 8, 11, 14, 17, 20, 22};
ColumnIndex = {0, 1, 1, 0, 2, 2, 1, 3, 3, 2, 4, 4, 3, 5, 5, 4, 6, 6, 5, 7, 7, 6};
IndependentTerm = {11, 12, 12, 12, 12, 12, 12, 11};
InitialAnswer = {5, 5, 5, 5, 5, 5, 5, 5};
}
auto Start = steady_clock::now();
double_vector Answer = Jacobi(Coefficients, RowIndex, ColumnIndex, IndependentTerm, InitialAnswer);
if (WorldRank == 0) {
auto End = steady_clock::now();
duration<double> ElapsedSeconds = End - Start;
print_vector(Answer, "Answer");
cout << "\n\tElapsed time: " << setprecision(6) << ElapsedSeconds.count() << "s" << endl << endl;
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
以下是Jacobi例程:
double_vector Jacobi(double_vector& Coefficients, int_vector& RowIndex, int_vector& ColumnIndex, double_vector& IndependentTerm, double_vector& InitialAnswer) {
int WorldRank;
MPI_Comm_rank(MPI_COMM_WORLD, &WorldRank);
int WorldSize;
MPI_Comm_size(MPI_COMM_WORLD, &WorldSize);
int Length = Coefficients.size();
int Order = IndependentTerm.size();
int Remainder = Order % WorldSize;
int Counter = 0;
int* Displacements = new int[WorldSize];
int* ElementsPerProcess = new int[WorldSize];
for (int i=0; i<WorldSize; i++) {
ElementsPerProcess[i] = Order/WorldSize;
if (Remainder > 0) {
ElementsPerProcess[i]++;
Remainder--;
}
Displacements[i] = Counter;
Counter += ElementsPerProcess[i];
}
int iStart = Displacements[WorldRank];
int iEnd = iStart + ElementsPerProcess[WorldRank];
MPI_Bcast(&Coefficients[0], Length, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&RowIndex[0], Order+1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&ColumnIndex[0], Length, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&IndependentTerm[0], Order, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(&InitialAnswer[0], Order, MPI_DOUBLE, 0, MPI_COMM_WORLD);
int MaximumIterations = 1000;
double Summatory;
double_vector Answer(Order);
for (int k=0; k<MaximumIterations; k++) {
for (int i=iStart; i<iEnd; i++) {
Summatory = 0.0;
for (int j=RowIndex[i]+1; j<RowIndex[i+1]; j++) {
Summatory -= Coefficients[j] * InitialAnswer[ColumnIndex[j]];
}
Summatory += IndependentTerm[i];
Answer[i] = Summatory / Coefficients[RowIndex[i]];
}
MPI_Allgatherv(&Answer[iStart], ElementsPerProcess[WorldRank], MPI_DOUBLE, &InitialAnswer[0], ElementsPerProcess, Displacements, MPI_DOUBLE, MPI_COMM_WORLD);
}
delete Displacements;
delete ElementsPerProcess;
return Answer;
}
我的测试用例是一个8x8三对角线性系统,具有严格的主导对角线。未知值均为1.0。运行 mpirun -n 1 ./main ,它会输出正确的答案:
1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
但是,当我使用多个进程执行此例程时,答案值不会收集到所有进程。也就是说,如果进程0 打印答案,则会显示以下内容:
1.000 1.000 1.000 1.000 0.000 0.000 0.000 0.000
如果我得到进程1 来打印答案:
0.000 0.000 0.000 0.000 1.000 1.000 1.000 1.000
已经确认所有进程都可以正确访问我播放的数据。我也相信我正确使用 MPI_Allgatherv 。
MPICH 3.2 - 编译器:mpicxx - 当前标志:-g -std = c ++ 14
关于我做错什么的任何想法?
谢谢。