我正在尝试使用mpi4py优化机械带扣代码,但我没有设法找到如何将计算分布到多个核中(更精确地说是4个核)。您能为我提供什么mpi4py功能或我如何简单地优化此问题? 谢谢
我想将“ for”循环(尤其是第一个和最后一个循环)划分为4个不同的块,或者使用散点图并收集以在不同等级之间分配计算。
param.dat
10000
1000
主文件
import numpy
import sys
import math
import time
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
def secondMembre(N,dt,dx,E, rho, S, I, F, u, SM):
i=0
du,d2u,d3u,d4u,NL,lin=0.,0.,0.,0.,0.,0.
clin, cnl, cm,cmax,maxu=0.,0.,0.,0.,0.
u[0] = 0.
u[1] = 0.
SM[0]= 0.
SM[1]= 0.
u[N-2] = 0.
u[N-1] = 0.
SM[N-2]= 0.
SM[N-1]= 0.
for i in range(2,N-2):
du = (u[i+1] - u[i-1])/(2.0*dx)
d2u = (u[i+1] - 2.0*u[i] + u[i-1])/(dx*dx)
d4u = (u[i+2] - 4.0*u[i+1] + 6.0*u[i] -4.0*u[i-1] + u[i-2])/(dx*dx*dx*dx)
NL = -(E/rho)*0.5*du*du*d2u
lin = -(E*I/(rho*S))*d4u + dx*F
SM[i] = lin +NL
maxu = numpy.fmax(math.fabs(u[i]),maxu)
clin = math.sqrt((6.0*E*I)/(rho*S))
cnl = math.sqrt(2.0*E/rho)*maxu
if(maxu<1.0e-14):
cnl = 0
cm = numpy.fmax( clin , cnl)
cmax = numpy.fmax(cm,cmax)
return cmax
def integre(N,dt,u,v,SM):
for i in range(0,N):
v[i] = v[i] + dt*SM[i]
u[i] = u[i] + dt*v[i]
def writing(file,N,dx,u):
for i in range(0,N):
file.write("%.12lf %.12lf\n" %((i+0.5)*dx,u[i]))
fres = open("def.dat","w");
finit = open("def_init.dat","w")
fparam = open("param.dat","r+")
N=int(fparam.readline())
Nt=int(fparam.readline())
print("%d %d\n" %(N,Nt))
u = numpy.zeros(N)
v = numpy.zeros(N)
SM = numpy.zeros(N)
dx = 1.0/N
E = 1.0e9
rho = 1.0e3
b = 0.02
h = 0.02
S = b*h
I = b*h*h*h/12.0
F = -1000.0
dt = 1.0e-10
for i in range(0,N):
u[i] = 0.
v[i] = 0.
SM[i]= 0.
writing(finit,N,dx,u)
代码的下一部分是最耗时的,所以我认为这将是分成不同块的循环。
for it in range(0,Nt):
cm = secondMembre(N,dt,dx,E,rho,S,I,F,u,SM)
dt = 0.5*dx*dx/cm
integre(N,dt,u,v,SM)
writing(fres,N,dx,u)
fres.close()
finit.close()