
时间:2017-12-15 09:37:48

标签: c mpi hpc


#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>

struct timespec diff(struct timespec start, struct timespec end){
        struct timespec temp;
                temp.tv_sec = end.tv_sec-start.tv_sec-1;
                temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
                temp.tv_sec = end.tv_sec-start.tv_sec;
                temp.tv_nsec = end.tv_nsec-start.tv_nsec;
        return temp;

int main(int argc, char* argv[])
    struct timespec start_e2e, end_e2e, start_alg, end_alg, e2e, alg;
        /* Should start before anything else */
        clock_gettime(CLK, &start_e2e);

        /* Check if enough command-line arguments are taken in. */
        if(argc < 3) {
                printf( "Usage: %s n p \n", argv[0] );
                return -1;

    MPI_Init(NULL, NULL);

    const int n = atoi(argv[1]);
    const int p = atoi(argv[2]);

    int world_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    int** matA = (int **) malloc(n * sizeof(int *));
    int** matB = (int **) malloc(n * sizeof(int *));
    int** matC = (int **) malloc(n * sizeof(int *));
    int i, j;   
    for(i = 0; i < n; i++)
        matA[i] = (int *) malloc(n * sizeof(int));
        matB[i] = (int *) malloc(n * sizeof(int));
        matC[i] = (int *) malloc(n * sizeof(int));
        for(j = 0; j < n; j++)
            matB[i][j] = 1; // Initialize
            matC[i][j] = 0; // Initialize

    // Total number of processors
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    if(world_rank == 0)
        for(i = 0; i < n; i++)
            for(j = 0; j < n; j++)
                matA[i][j] = 2;
        int destination;
        double start = MPI_Wtime();

        clock_gettime(CLK, &start_alg); /* Start the algo timer */

        for(destination = 1; destination < world_size; destination++) 
            int start = destination * (n / world_size);
            int end = (destination + 1) * (n / world_size);
            if(destination == world_size - 1)
                end = n;
            int offset = start;
            int rows = (end - start);
            MPI_Send(&offset, 1, MPI_INT, destination, 1, MPI_COMM_WORLD); // Send offset
            MPI_Send(&rows, 1, MPI_INT, destination, 2, MPI_COMM_WORLD); // Send number of rows
            MPI_Send(&matA[offset][0], rows * n, MPI_INT, destination, 3, MPI_COMM_WORLD); // Send portion of matrix A  
        double sending = MPI_Wtime();

        // Do matrix multiplication specific to master processor
        int k;
        int rows = n / world_size;
        for(i = 0; i < rows; i++) 
            for(j = 0; j < n; j++) 
                for(k = 0; k < n; k++)
                    matC[i][j] += (matA[i][k] * matB[k][j]);

        // Wait for other processors to complete and combine their results
        double receiving = MPI_Wtime();
        int source;
        for(source = 1; source < world_size; source++) 
            int offset, rows;
            MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // Receive offset
            MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // Receive number of rows
            MPI_Recv(&matC[offset][0], rows * n, MPI_INT, source, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // Receive portion of matrix C
        double end = MPI_Wtime();
        clock_gettime(CLK, &end_alg); /* End the algo timer */
        clock_gettime(CLK, &end_e2e);
            e2e = diff(start_e2e, end_e2e);
            alg = diff(start_alg, end_alg);
        printf("%s,%s,%d,%d,%d,%ld,%d,%ld\n", problem_name, approach_name, n, p, e2e.tv_sec, e2e.tv_nsec, alg.tv_sec, alg.tv_nsec);
        int offset;
        int rows;
        MPI_Recv(&offset, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);     // Receive offset
        MPI_Recv(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);       // Receive number of rows
        MPI_Recv(&matA[offset][0], rows * n, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);  // Receive portion of matrix A

        int k;

        // Do matrix multiplication
        for(i = offset; i < offset + rows; i++) {
            for(j = 0; j < n; j++) {
                for(k = 0; k < n; k++) {
                    matC[i][j] += (matA[i][k] * matB[k][j]);
        MPI_Send(&offset, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); // Send offset
        MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD); // Send number of rows
        MPI_Send(&matC[offset][0], rows * n, MPI_INT, 0, 3, MPI_COMM_WORLD); // Send portion of matrix C
    for(i = 0; i < n; i++) {
    printf("End:%d\n", world_rank);


This is the error trace 对于低质量的图像感到抱歉,我没有任何其他方法来提取痕迹。


编辑:预期输出:存储在matC中的两个矩阵matA和matB的简单矩阵乘法。 matA的所有条目都是2,matB的所有条目都是1.因此matc在所有条目中都应该有2n,其中nxn是matA,matB和matC的维度。

编辑:错误测试用例:对于跟随n(维度),p(核心数),代码给出了seg错误。我认为这是随机的,但要提出更清楚的问题  1. n = 2048 p = 12
 2. n = 64 p = 16
 3. n = 1024 p = 28
 4. n = 2048 p = 16等等

0 个答案:
