我在下面的代码中得到两个大小为VECTORSIZE
的向量的点积。代码工作正常,直到VECTORSIZE
达到10000,但它会产生无关的结果。当我尝试调试程序时,我看到处理器0(root)在所有处理器发送其本地结果之前完成其工作。当我使用MPI_Reduce()
(代码第2部分)时,我遇到了同样的情况。但是,如果我在MPI_Scatter()
之前使用MPI_Reduce()
则可以。
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define VECTORSIZE 10000000
#define ROOT 0
//[[## operation ConstructVectorPart()
void ConstructVector(double * vector, int size, short vectorEnu)
{
int i = 0;
if(vectorEnu == 1) // i.e vector 1
{
for(i = 0; i < size; i++)
{
vector[i] = 0.1 + (i%20)*0.1;
}
}
else if(vectorEnu == 2) // i.e. vector 2
{
for(i = 0 ; i < size; i++)
{
vector[i] = 2-(i%20)*0.1;
}
}
}
//[[## operation dotproduct()
double dotproduct(double* a, double* b, int length)
{
double result = 0;
int i = 0;
for (i = 0; i<length; i++)
result += a[i] * b[i];
return result;
}
int main( argc, argv )
int argc;
char **argv;
{
int processorID, numofProcessors;
int partialVectorSize ;
double t1, t2, localDotProduct, result;
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &numofProcessors );
MPI_Comm_rank( MPI_COMM_WORLD, &processorID );
if(processorID == 0)
t1 = MPI_Wtime();
// all processors constitute their own vector parts and
// calculates corresponding partial dot products
partialVectorSize = VECTORSIZE/ numofProcessors;
double *v1, *v2;
v1 = (double*)(malloc((partialVectorSize) * sizeof(double)));
v2 = (double*)(malloc((partialVectorSize) * sizeof(double)));
ConstructVectorPart(v1,0,partialVectorSize,1);
ConstructVectorPart(v2,0,partialVectorSize,2);
localDotProduct = dotproduct(v1,v2, partialVectorSize);
printf(" I am processor %d \n",processorID);
//----------------- code part 1 ---------------------------------------------
if( processorID != 0 ) // if not a master
{ // send partial result to master
MPI_Send( &localDotProduct, 1, MPI_DOUBLE, 0,0, MPI_COMM_WORLD );
}
else // master
{ // collect results
result = localDotProduct; // own result
int j;
for( j=1; j<numofProcessors; ++j )
{
MPI_Recv( &localDotProduct, 1, MPI_DOUBLE, j, 0, MPI_COMM_WORLD,MPI_STATUS_IGNORE);
result += localDotProduct;
}
t2 = MPI_Wtime();
printf(" result = %f TimeConsumed = %f \n",result, t2-t1);
}
//----------------------------------------------------------------------------
/*
//--------------------- code part 2 ----------------
MPI_Reduce(&localDotProduct, &result, 1, MPI_DOUBLE, MPI_SUM, 0,MPI_COMM_WORLD);
if(processorID == 0)
{
t2 = MPI_Wtime();
printf(" result = %f TimeConsumed = %f \n",result, t2-t1);
}
//---------------------------------------------------
*/
MPI_Finalize();
free(v1);
free(v2);
return 0;
}