从哪里开始使用mpi4py并行化python代码

时间:2019-06-27 14:04:27

标签: python numpy mpi4py

我正在尝试使用mpi4py优化机械带扣代码,但我没有设法找到如何将计算分布到多个核中(更精确地说是4个核)。您能为我提供什么mpi4py功能或我如何简单地优化此问题? 谢谢

我想将“ for”循环(尤其是第一个和最后一个循环)划分为4个不同的块,或者使用散点图并收集以在不同等级之间分配计算。

param.dat

10000
1000

主文件


import numpy
import sys
import math
import time

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

def secondMembre(N,dt,dx,E, rho, S, I, F, u, SM):
    i=0
    du,d2u,d3u,d4u,NL,lin=0.,0.,0.,0.,0.,0.
    clin, cnl, cm,cmax,maxu=0.,0.,0.,0.,0.

    u[0] = 0.
    u[1] = 0.
    SM[0]= 0.
    SM[1]= 0.

    u[N-2] = 0.
    u[N-1] = 0.
    SM[N-2]= 0.
    SM[N-1]= 0.

    for i in range(2,N-2):

        du = (u[i+1] - u[i-1])/(2.0*dx)

        d2u = (u[i+1] - 2.0*u[i] + u[i-1])/(dx*dx)

        d4u = (u[i+2] - 4.0*u[i+1] + 6.0*u[i] -4.0*u[i-1] + u[i-2])/(dx*dx*dx*dx)

        NL = -(E/rho)*0.5*du*du*d2u

        lin = -(E*I/(rho*S))*d4u + dx*F

        SM[i] = lin +NL

        maxu = numpy.fmax(math.fabs(u[i]),maxu)


    clin = math.sqrt((6.0*E*I)/(rho*S))
    cnl  = math.sqrt(2.0*E/rho)*maxu
    if(maxu<1.0e-14):
        cnl = 0

    cm = numpy.fmax( clin , cnl)
    cmax = numpy.fmax(cm,cmax)
    return cmax

def integre(N,dt,u,v,SM):

    for i in range(0,N):

        v[i] = v[i] + dt*SM[i]
        u[i] = u[i] + dt*v[i]


def writing(file,N,dx,u):
    for i in range(0,N):

        file.write("%.12lf %.12lf\n" %((i+0.5)*dx,u[i]))

fres = open("def.dat","w");
finit = open("def_init.dat","w")
fparam = open("param.dat","r+")

N=int(fparam.readline())
Nt=int(fparam.readline())
print("%d %d\n" %(N,Nt))

u = numpy.zeros(N)
v = numpy.zeros(N)
SM = numpy.zeros(N)
dx = 1.0/N
E = 1.0e9
rho = 1.0e3
b = 0.02
h = 0.02
S = b*h
I = b*h*h*h/12.0
F = -1000.0
dt = 1.0e-10

for i in range(0,N):
    u[i] = 0.
    v[i] = 0.
    SM[i]= 0.
writing(finit,N,dx,u)

代码的下一部分是最耗时的,所以我认为这将是分成不同块的循环。

for it in range(0,Nt):
    cm = secondMembre(N,dt,dx,E,rho,S,I,F,u,SM)
    dt = 0.5*dx*dx/cm
    integre(N,dt,u,v,SM)

writing(fres,N,dx,u)
fres.close()
finit.close()

0 个答案:

没有答案