当数据很大时,使用点对点操作的mpi dot产品会失败

时间:2014-03-24 19:39:16

标签: mpi

我在下面的代码中得到两个大小为VECTORSIZE的向量的点积。代码工作正常,直到VECTORSIZE达到10000,但它会产生无关的结果。当我尝试调试程序时,我看到处理器0(root)在所有处理器发送其本地结果之前完成其工作。当我使用MPI_Reduce()(代码第2部分)时,我遇到了同样的情况。但是,如果我在MPI_Scatter()之前使用MPI_Reduce()则可以。

#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"

#define VECTORSIZE 10000000
#define ROOT 0


//[[## operation ConstructVectorPart()
void ConstructVector(double * vector, int size, short vectorEnu)
{
  int i = 0;
    if(vectorEnu == 1) // i.e vector 1
    {
        for(i = 0; i < size; i++)
        {
            vector[i] = 0.1 + (i%20)*0.1;
        }
    }
    else if(vectorEnu == 2) // i.e. vector 2
    {
        for(i = 0 ; i < size; i++)
        {
            vector[i] = 2-(i%20)*0.1;
        }
    }
}

//[[## operation dotproduct()
double dotproduct(double* a, double* b, int length) 
{
  double result = 0;
    int i = 0;

  for (i = 0; i<length; i++)
        result +=  a[i] * b[i];

  return result;
}

int main( argc, argv )
int  argc;
char **argv;
{
  int processorID, numofProcessors;
    int partialVectorSize ;
    double t1, t2, localDotProduct, result;

  MPI_Init( &argc, &argv );
  MPI_Comm_size( MPI_COMM_WORLD, &numofProcessors );
  MPI_Comm_rank( MPI_COMM_WORLD, &processorID );

    if(processorID == 0)
        t1 = MPI_Wtime();

    // all processors constitute their own vector parts and 
    // calculates corresponding partial dot products
    partialVectorSize = VECTORSIZE/ numofProcessors;


    double *v1, *v2;
    v1 = (double*)(malloc((partialVectorSize) * sizeof(double)));
    v2 = (double*)(malloc((partialVectorSize) * sizeof(double)));

    ConstructVectorPart(v1,0,partialVectorSize,1);
    ConstructVectorPart(v2,0,partialVectorSize,2);

    localDotProduct = dotproduct(v1,v2, partialVectorSize);

    printf(" I am processor %d \n",processorID);

//----------------- code part 1 ---------------------------------------------   
    if( processorID != 0 ) // if not a master
    { // send partial result to master
        MPI_Send( &localDotProduct, 1, MPI_DOUBLE, 0,0, MPI_COMM_WORLD );
    }
    else // master
    {  // collect results
      result = localDotProduct; // own result
        int j;
        for( j=1; j<numofProcessors; ++j )
        {
            MPI_Recv( &localDotProduct, 1, MPI_DOUBLE, j, 0, MPI_COMM_WORLD,MPI_STATUS_IGNORE);
            result += localDotProduct;
        }
        t2 = MPI_Wtime();
        printf(" result = %f  TimeConsumed = %f \n",result, t2-t1);
    }
 //----------------------------------------------------------------------------


    /*
   //--------------------- code part 2 ----------------
  MPI_Reduce(&localDotProduct, &result, 1, MPI_DOUBLE, MPI_SUM, 0,MPI_COMM_WORLD);

    if(processorID == 0)
    {
            t2 = MPI_Wtime();
            printf(" result = %f  TimeConsumed = %f \n",result, t2-t1);
    }
    //---------------------------------------------------
    */

    MPI_Finalize();

        free(v1);
        free(v2);

    return 0;
}

0 个答案:

没有答案