我在具有8个内核的i7上使用Python 3.6来天真地解决网格上的微分方程。函数(如下所示)使用numpy(求和,乘法和点积)进行一系列向量化计算,并使用运行总和在每个时间步执行for循环更新值。
def dynamics(cParams, gParams, sParams):
# takes parameters, performs dynamics, and outputs desired observables
[P, aIBi] = cParams
[kcutoff, dk, Ntheta, dtheta, tMax, dt] = gParams
[mI, mB, n0, gBB] = sParams
kVec = np.arange(dk, kcutoff, dk)
thetaVec = np.arange(dtheta, np.pi, dtheta)
tVec = np.arange(0, tMax, dt)
# initial conditions
Bk0_mat = np.zeros((thetaVec.size, kVec.size), dtype=complex)
Bk0_V = Bk0_mat.reshape(thetaVec.size * kVec.size)
phi0 = 0 + 0j
# precomputing things that only depend on k,theta and not t
Omega0K = omega0_k(kVec, gBB, mI, mB, n0)
Wkv = Wk(kVec, gBB, mB, n0)
gnum = g(aIBi, kcutoff, gBB, mI, mB, n0)
thetaones = np.ones(thetaVec.size)
Omega0K_mat = np.outer(thetaones, Omega0K)
Wk_mat = np.outer(thetaones, Wkv)
dV_mat = (2 * np.pi / (2 * np.pi)**3) * np.outer(dtheta * np.sin(thetaVec), dk * kVec**2)
kcos_mat = np.outer(np.cos(thetaVec), kVec)
Omega0K_Vec = Omega0K_mat.reshape(thetaVec.size * kVec.size)
Wk_Vec = Wk_mat.reshape(thetaVec.size * kVec.size)
Wki_Vec = 1 / Wk_Vec
dV_Vec = dV_mat.reshape(thetaVec.size * kVec.size)
kcos_Vec = kcos_mat.reshape(thetaVec.size * kVec.size)
# calculate differential equation
# setting initial beta vector and initializing matrices
Bkt = Bk0_V
phit = phi0
PB_Vec = np.zeros(tVec.size, dtype=float)
phi_Vec = np.zeros(tVec.size, dtype=complex)
NB_Vec = np.zeros(tVec.size, dtype=float)
for ind, t in enumerate(tVec):
# keep track of quantities we care about (storing data)
PBt = PB(Bkt, kcos_Vec, dV_Vec, gBB, mB, n0)
PB_Vec[ind] = PBt
# print(PBt)
phi_Vec[ind] = phit
NBt = np.dot(Bkt * np.conjugate(Bkt), dV_Vec)
NB_Vec[ind] = NBt
# print(NBt)
# calculate some useful quantities that will be useful later in the loop
xpt = pchi(Bkt, Wk_Vec, dV_Vec, gBB, mB, n0)
xmt = mchi(Bkt, Wki_Vec, dV_Vec, gBB, mB, n0)
# update Bkt and ast to the t+1 value
BDiff = -1j * (gnum * np.sqrt(n0) * Wk_Vec + Bkt * (Omega0K_Vec - kcos_Vec * (P - PB_Vec[ind]) / mI) + gnum * (Wk_Vec * xpt + Wki_Vec * xmt))
phiDiff = gnum * n0 + gnum * np.sqrt(n0) * xpt + (P**2 - PB_Vec[ind]**2) / (2 * mI)
Bkt = Bkt + dt * BDiff
phit = phit + dt * phiDiff
# print([PBt, xpt, xmt])
S_Vec = dynOverlap(NB_Vec, phi_Vec)
freqVec, A_Vec = spectFunc(S_Vec, tVec)
# save data
tfData = [tVec, freqVec]
paramData = [cParams, gParams, sParams]
obData = [PB_Vec, NB_Vec, S_Vec, A_Vec]
data = [paramData, tfData, obData]
# return data
# dirpath = os.path.dirname(os.path.realpath(__file__))
# np.save(dirpath + '/pdata/fmquench_aIBi:%.2f_P:%.2f.npy' % (aIBi, P), data)
return data
当我尝试使用pool.starmap vs serialally(使用for循环或itertools.starmap)更改输入参数多次运行该函数时,串行计算速度明显加快。在下面的示例中,我将函数调用7次,对于定义的网格参数(未显示),每次串行运行大约需要85秒,总计约为512秒。当我使用所有8个处理器的池时,运行时间约为705秒。据我所知,不同的函数调用没有通信,每个函数调用所做的工作都很重要,所以我不明白为什么多处理速度较慢。我做错了什么/哪些瓶颈会导致代码导致高IPC开销/什么使多处理速度变慢?
# create iterable over all tuples of function arguments for dynamics()
paramsIter = zip(cParams_List, it.repeat(gParams), it.repeat(sParams))
# compute data (parallel)
start = timer()
with mp.Pool() as pool:
# pool = mp.Pool()
pool.starmap(dynamics, paramsIter)
# pool.close()
# pool.join()
end = timer()
print(end - start)
# compute data (serial) - for loop
# start = timer()
# for z in paramsIter:
# dynamics(*z)
# end = timer()
# print(end-start)
# compute data (serial) - starmap
# start = timer()
# for i in it.starmap(dynamics, paramsIter):
# i
# end = timer()
# print(end - start)