Question

我正在开发一个基本上预先形成N体模拟的程序，但具有多个潜能并很快成为时间依赖性潜力。目前该程序非常缓慢，而我的主管代码速度提高了600倍，尽管这是预期的 - 它是用C语言编写的。

import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as la
from scipy import interpolate as ir


G=0.004300913551233961 #Grav. constant
Mh=8.*10**11 #Mass of halo
ah,ch=16.*10**3,15.3 #Halo paramters
c=1/(np.log(1.+ch)-ch/(1.+ch)) #Simplification for formula
Mb=5*10**9 #Mass of bulge
ab=500 #Bulge parameter
Md=6.8*10**10 #Mass of disk
ad,bd=3000,280 #Disk parameters
Mp=2.*10**4 #Progenitor mass
ap=15. #Progenitor parameter

xs1=np.zeros((5000,3)) #Stellar coordinates
vs1=np.zeros((5000,3)) #Stellar velocities
v2=np.zeros(3) #Stellar velocities at half-step

pd=np.load('test.npy') #Loading progenitor data
tp=pd[:,6] #Assigning time-steps of progentior
x,y,z=pd[:,0],pd[:,1],pd[:,2] #Assigning coordinates
vx,vy,vz=pd[:,3],pd[:,4],pd[:,5] #Assigning velocities
f,a=pd[:,-2],pd[:,-1] #Lagrange points
f1,f2,f3=ir.interp1d(tp,x),ir.interp1d(tp,y),ir.interp1d(tp,z) #Interp1d func-s
g1,g2,g3=ir.interp1d(tp,vx),ir.interp1d(tp,vy),ir.interp1d(tp,vz) #Interp1d func-s
h1,h2=ir.interp1d(tp,f),ir.interp1d(tp,a)
xs,vs=np.array([0,0,0]),np.array([0,0,0])

h=np.zeros((10,3)) #Empty arrays
#Norms for stars
def md():
    md=la.norm(xs)
    return md

#Norms between star and progenitor
def nd():
    nd=la.norm(xs-[f1(t),f2(t),f3(t)])
    return nd

#Norm of projenitor
def nm():
    nm=la.norm([f1(t),f2(t),f3(t)])
    return nm

#Progenitor force
def pfc():
    pfc=np.multiply(-G*Mp*(nd()**2+ap**2)**(-1.5),(xs-[f1(t),f2(t),f3(t)]),out=h[1])
    return pfc

#Halo force
def hfc():
    hfc=np.multiply(-G*Mh*c*(np.log(1+md()/ah)/md()**3-1/(md()**2*(ah+md()))),xs,out=h[2])
    return hfc

#Bulge force
def bfc():
    bfc=np.multiply(-G*Mb/(md()*(ab+md())**2),xs,out=h[4])
    return bfc

#Disk force
def dfc():
    q=np.multiply(xs,[1.,1.,(1+ad/(bd**2+xs[2]**2)**0.5)],out=h[0])
    dfc=np.multiply(-G*Md/(xs[0]**2+xs[1]**2+(ad+(bd**2+xs[2]**2)**0.5)**2)**1.5,q,out=h[5])
    return dfc

#Dynamic timestep
def ddt():
    for j in range(w):
        xs=xs1[j]
        ddt.append(min(((nd()/abs(la.norm(pfc())))**0.5/10.,(md()/abs(la.norm(dfc()+hfc()+bfc())))**0.5))/10.)
    ddt=min(ddt)

t,w=.5,1
xs1[0]=[f1(t),f2(t),f3(t)]*h2(t)
vs1[0]=np.random.normal([g1(t),g2(t),g3(t)],1)
for t in range(0,10000,5):
    for i in range(w):
        xs=xs1[i]
        vs=vs1[i]
        v2=vs+(dfc()+bfc()+hfc()+pfc())*dt/2
        xs=xs+v2*dt
        vs=v2+(dfc()+bfc()+hfc()+pfc())*dt/2
        xs1[i]=xs
        vs1[i]=vs

    if t>10*w+5:
        xs1[w]=[f1(t),f2(t),f3(t)]*h2(t)
        vs1[w]=np.random.normal([g1(t),g2(t),g3(t)],1)
        xs1[w+1]=[f1(t),f2(t),f3(t)]*h1(t)
        vs1[w+1]=np.random.normal([g1(t),g2(t),g3(t)],1)
        w+=2

正如您所看到的，每个循环都会进行大量操作。我试过制作一个程序来分别计算每颗恒星的轨道，因为它们被认为是彼此不相互作用的。问题就出现了，因为我正在使用动态时间步长，因此每个星的建模概率直到t = 10000都非常低，并且它们产生的图像不准确。

我搜索了某种解决方案，似乎 numba 是完全符合这个问题的那个，因为它使用C（从我收集的内容）来进行所有计算，但是没有有关如何将其用于我的具体问题的足够信息。

我会开始用C语言写作，但我再也没有时间开始学习这个，因为我的项目需要继续前进。我决定在这里发帖，只因为我已尽力而为，我真的需要一些方向。

我的程序非常慢，我确信当我添加时间相关的潜力时 - 它可能会停止运行。因此，如果有任何明智的方法来加速我的代码，我很乐意听到它，我会非常感激。

编辑：我应该补充一点，我觉得另一个程序，我分别为每个星星建模，还有一个额外的好处，就是不需要创建一个巨大的numpy数组来存储坐标。不幸的是，时间步问题仍然存在。

用numba加速python代码的方法？包含多个函数和两个嵌套循环

0 个答案: