Question

我想发送以STL矢量形式存储的矩阵的多个列

    vector < vector < double > > A ( 10, vector <double> (10));

没有使用Boost MPI将内容复制到某个缓冲区（因为这里的计算时间至关重要）。

我发现，用MPI可以做到这一点。下面是示例代码如何从一个进程（rank == 0）向另一个进程（rank == 1）发送10乘10矩阵的第4，第5和第6列。（即使我不知道为什么我必须在MPI_Typ_vector的第三个参数中添加'2'。有谁知道为什么？）。

    int rank, size;
    MPI_Init (&argc, &argv);        /* starts MPI */
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);  /* get current process id */
    MPI_Comm_size (MPI_COMM_WORLD, &size);  /* get number of processes */

    // fill matrices
    vector< vector <float> >A(10, vector <float> (10));
    vector< vector <float> >A_copy(10, vector <float> (10));
    for (int i=0; i!=10; i++)
    {
            for (int j=0; j!=10; j++)
            {
                    A[i][j]=j+ i*10;
                    A_copy[i][j]=0.0;
            }
    }

    int dest=1;
    int tag=1;
    // define new type = two columns
    MPI_Datatype    newtype;
    MPI_Type_vector(10,     /* # column elements */
    3,                      /* 3 column only */
    10+2,                   /* skip 10 elements */
    MPI_FLOAT,              /* elements are float */
    &newtype);              /* MPI derived datatype */
    MPI_Type_commit(&newtype);

    if (rank==0)
    {
            MPI_Send(&A[0][4], 1, newtype, dest, tag, MPI_COMM_WORLD);
    }
    if (rank==1)
            MPI_Status status;
            MPI_Recv(&A_copy[0][4], 1, newtype, 0, tag, MPI_COMM_WORLD, &status);
    }
    MPI_Finalize();

在Boost网页上，他们声称MPI_Type_vector“在Boost.MPI中自动使用”（http://www.boost.org/doc/libs/1_47_0/doc/html/mpi/tutorial.html#mpi。 c_mapping）。

但我找不到一个如何详细说明的例子。只知道如何使用Boost发送整个矩阵或每个元素。

提前谢谢你，

托拜厄斯

Answer 1

我通过编写自己的类'列'并将其序列化来解决问题。这是一个示例代码：

#include<iostream>
#include<vector>
#include <boost/mpi/environment.hpp>
#include <boost/mpi/communicator.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/complex.hpp>

using namespace std;   
namespace mpi=boost::mpi;

class columns
{
public:
int Nr;
int Nc;
int J0;
int J1;
vector < vector <double> >* matrix;

columns(vector < vector <double> > & A, int j0, int j1)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
    this->J0=j0;
    this->J1=j1;
}
columns(vector < vector <double> > & A)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
}
columns(){};
};

namespace boost {
namespace serialization {

    template<class Archive>
    void serialize(Archive & ar, columns & g, const unsigned int version)
    {
        ar & g.Nr;
        ar & g.Nc;
        ar & g.J0;
        ar & g.J1;

        for (int i=0; i!=g.Nr; i++)
        {       
            for (int j=g.J0; j!=g.J1; j++)
            {       
                ar & (*g.matrix)[i][j];
            }
        }
    }
}
}


int main(int argc, char * argv[])
{
mpi::environment env(argc, argv);
mpi::communicator world;
int myid=world.rank();
int NN=world.size();

int Nl=3;
int Ng=5;

int myStart=myid*Ng/NN;
int myEnd=(myid+1)*Ng/NN;
int myN=myEnd-myStart;

if (myid==0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=n+j;
        }
    }

    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "\t";
        }
        cout << endl;
    }
    cout << "############################" << endl;

    // divide grid for parallization
    vector<int> starts(NN);
    vector<int> ends(NN);
    vector<int> Nwork(NN);
    for (int p=0; p!=NN; p++)
    {
        starts[p]=p*Ng/NN;
        ends[p]=(p+1)*Ng/NN;
        Nwork[p]=ends[p]-starts[p];
    }


    vector<columns> input_columns(NN);
    for (int p=1; p!=NN; p++)
    {
        input_columns[p]=columns(input, starts[p], ends[p]);
    }


    for (int p=1; p!=NN; p++)
    {
        world.send(p, 1, input_columns[p]);
    }
}

if (myid!=0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=0.0;
        }
    }

    columns input_columns  = columns(input, myStart, myEnd);

    world.recv(0, 1, input_columns); 


    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "\t";
        }
        cout << endl;
    }
    cout << "############################" << endl;
}
}

说明：'columns'-class包含指向矩阵的指针和两个指示列开始和结束位置的数字。

class columns
{
    public:
    int Nr;              // number of rows in the matrix
    int Nc;              // number of columns in the matrix
    int J0;              // column start index
    int J1;              // column end index
    vector < vector <double> >* matrix;

    columns(vector < vector <double> > & A, int j0, int j1)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
            this->J0=j0;
            this->J1=j1;
    }
    columns(vector < vector <double> > & A)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
    }
    columns(){};
};

使用以下代码，告诉boost-serialization如何序列化这些'columns'-class：

namespace boost {
    namespace serialization {

            template<class Archive>
            void serialize(Archive & ar, columns & g, const unsigned int version)
            {
                    ar & g.Nr;
                    ar & g.Nc;
                    ar & g.J0;
                    ar & g.J1;

                    for (int i=0; i!=g.Nr; i++)
                    {
                            for (int j=g.J0; j!=g.J1; j++)
                            {
                                    ar & (*g.matrix)[i][j];
                            }
                    }
            }
    }
}

然后填充矩阵'输入'

vector < vector <double> > input (Nl, vector <double>(Ng));
            for (int n=0; n!=Nl; n++)
            {
                    for (int j=0; j!=Ng; j++)
                    {
                            input[n][j]=n+j;
                    }
            }

并初始化一个列类对象（现在包含指向矩阵'input'的指针）：

vector<columns> input_columns(NN)

并通过

将其发送到另一个（子）流程

world.send(p, 1, input_columns);

最后收到

world.recv(0, 1, input_columns);

Answer 2

如果要对A执行大量列操作，可能应该存储A转置而不是A.这会将列放在连续的内存位置。这意味着您可以使用MPI_Send发送列而无需执行任何复制操作。此外，列操作会更快。

如何使用C ++ STL向量通过Boost MPI发送矩阵的列？

2 个答案: