我试图使用numba加快此代码的速度,但是它看起来仍然很慢。有人告诉我,使用C ++可以在大约5分钟内运行此代码。因此,我想知道是否有任何方法可以进一步加快此代码的速度?
我尝试了Numba和Cython。 Cython的速度似乎较慢,但这可能是由于我对Cython的无知。
import numpy as np
from numba import jit
import time
start=time.time()
L = 100
n = int((L + 1)*(L+1))
realization = 1
Delta = 3.
Combinee = np.zeros((realization,2*n))
rr= np.zeros((n,3))
@jit(nopython=True,nogil=True)
def findr(rr):
L1 = L + 1
for s in range(n):
rr[s][0] = int(s/L1)%L1
rr[s][1] = s%L1
findr(rr)
@jit(nopython=True,nogil=True)
def checks():
pchargepos = np.random.rand(int(L*L*L/2),3)*L
nchargepos = np.random.rand(int(L*L*L/2),3)*L
Alln = np.ones((n,),dtype=np.int8)
phiext = np.zeros(n)
for pos1 in range(n):
for pos2 in range(int(L**3/2)):
phiext[pos1] += 1./np.linalg.norm(rr[pos1]-pchargepos[pos2]) - 1./np.linalg.norm(rr[pos1]-nchargepos[pos2])
phicol= np.zeros(n)
while True:
check = 0
sscheck = 0
for i in range(n):
oldn = Alln[i]
VB = -phiext[i] - phicol[i] - Delta/2
CB = -phiext[i] - phicol[i] + Delta/2
if Alln[i] == 0 and VB < 0:
Alln[i] = 1
if Alln[i] == 1 and VB > 0:
Alln[i] = 0
if Alln[i] == 1 and CB < 0:
Alln[i] = 2
if Alln[i] == 2 and CB > 0:
Alln[i] = 1
if Alln[i] != oldn:
check = 1
if Alln[i] != oldn:
for f in range(n):
if f != i:
phicol[f] -= (Alln[i] - oldn)/float(np.linalg.norm(rr[i]-rr[f]))
print(Alln)
for t in range(n):
for p in range(n):
if p != t:
secondcheck = 0
EE=phiext+phicol
Ep = EE[p]
Et = EE[t]
oldp = Alln[p]
oldt = Alln[t]
rtp = float(np.linalg.norm(rr[t]-rr[p]))
if oldt > 0 and oldp < 2:
if Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp < 0:
secondcheck = 1
if oldp > 0 and oldt < 2:
if -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < 0:
secondcheck = -1
if oldp == 1 and oldt == 1 and Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp < 0 and -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < 0:
if -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp:
secondcheck = -1
else:
secondcheck = 1
if secondcheck == 1:
Alln[t] -= 1
Alln[p] += 1
if secondcheck == -1:
Alln[t] += 1
Alln[p] -= 1
if secondcheck != 0:
sscheck = 1
if secondcheck != 0:
phicol[p] += -(Alln[t] - oldt)/rtp
phicol[t] += -(Alln[p] - oldp)/rtp
for y in range(n):
if y != p and y != t:
phicol[y] += -(Alln[p] - oldp)/float(np.linalg.norm(rr[p]-rr[y])) - (Alln[t]-oldt)/float(np.linalg.norm(rr[t]-rr[y]))
print(check, " ", sscheck, ": ", Alln)
if check == 0 and sscheck == 0:
break
print("Last:")
print(Alln)
Efinial = np.zeros(2*n)
Ei = -phiext
for w in range(n):
for o in range(n):
if o != w:
row = np.linalg.norm(rr[o]-rr[w])
Ei[w] += (-1 + Alln[o])/float(row)
Efinial[w] = Ei[w] - Delta/2
Efinial[w + n] = Ei[w] + Delta/2
return Efinial
@jit(nopython=True,nogil=True)
def loop(Combinee1):
for d in range(realization):
Combinee1[d]=checks()
return Combinee1
Combinee=loop(Combinee)
Combinee=Combinee.flatten()
Combinee=Combinee.tolist()
open('bothcheckdata.txt','w').close()
f=open("bothcheckdata.txt", "a+")
f.write(str(Combinee))
f.close()
end=time.time()
print(end-start)
答案 0 :(得分:0)
您提供的代码很大而且很乱。但是,最大的问题是以错误的方式使用numpy。 Numpy的全部目的在于向量化而非循环。所以how can I further speed up my code?
-正确使用numpy。
让我们看看您的代码中的一小部分:
# original code with nested loops with +1 to avoid zero division
def f_1(rr, pchargepos, nchargepos, phiext):
for pos1 in range(n):
for pos2 in range(int(len(nchargepos))):
phiext[pos1] += (
1 / (np.linalg.norm(rr[pos1] - pchargepos[pos2]) + 1) -
1 / (np.linalg.norm(rr[pos1] - nchargepos[pos2]) + 1))
return phiext
# modified code with numpy methods instead of loops
def f_2(rr, pchargepos, nchargepos, phiext):
a = np.tile(pchargepos, (len(rr), 1))
b = np.tile(nchargepos, (len(rr), 1))
c = np.tile(rr, (len(pchargepos), 1)).T
d = 1 / (abs(c - a) + 1)
e = 1 / (abs(c - b) + 1)
phiext += (d - e).sum(axis=1)
return phiext
if __name__ == '__main__':
n, m = 100, 200
rr = np.random.randint(-10, 10, (n,))
pchargepos = np.random.randint(-10, 10, (m,))
nchargepos = np.random.randint(-10, 10, (m,))
phiext = np.zeros((n,))
ans_1 = f_1(rr, pchargepos, nchargepos, phiext)
phiext = np.zeros((n,))
ans_2 = f_2(rr, pchargepos, nchargepos, phiext)
# check results
print(sum(ans_1), sum(ans_2))
我的机器上的运行时间:
%timeit f_1(rr, pchargepos, nchargepos, phiext)
211 ms ± 967 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit f_2(rr, pchargepos, nchargepos, phiext)
205 µs ± 214 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
所以numpy的速度比普通python中的嵌套循环快1000倍。