Question

我试图使用numba加快此代码的速度，但是它看起来仍然很慢。有人告诉我，使用C ++可以在大约5分钟内运行此代码。因此，我想知道是否有任何方法可以进一步加快此代码的速度？

我尝试了Numba和Cython。 Cython的速度似乎较慢，但这可能是由于我对Cython的无知。

import numpy as np
from numba import jit
import time


start=time.time()

L = 100
n = int((L + 1)*(L+1))
realization = 1

Delta = 3.



Combinee = np.zeros((realization,2*n))
rr= np.zeros((n,3))

@jit(nopython=True,nogil=True)
def findr(rr):
        L1 = L + 1
        for s in range(n):
          rr[s][0] = int(s/L1)%L1
          rr[s][1] = s%L1


findr(rr) 

@jit(nopython=True,nogil=True)
def checks():
     pchargepos = np.random.rand(int(L*L*L/2),3)*L
     nchargepos = np.random.rand(int(L*L*L/2),3)*L
     Alln = np.ones((n,),dtype=np.int8)
     phiext = np.zeros(n)


     for pos1 in range(n):
       for pos2 in range(int(L**3/2)):
         phiext[pos1] += 1./np.linalg.norm(rr[pos1]-pchargepos[pos2]) - 1./np.linalg.norm(rr[pos1]-nchargepos[pos2])









     phicol= np.zeros(n)


     while True:
       check = 0
       sscheck = 0
       for i in range(n): 
          oldn = Alln[i] 
          VB = -phiext[i] - phicol[i] - Delta/2
          CB = -phiext[i] - phicol[i] + Delta/2
          if Alln[i] == 0 and VB < 0:
            Alln[i] = 1
          if Alln[i] == 1 and VB > 0:
            Alln[i] = 0 
          if Alln[i] == 1 and CB < 0:
            Alln[i] = 2
          if Alln[i] == 2 and CB > 0:
            Alln[i] = 1
          if Alln[i] != oldn:
            check = 1
          if Alln[i] != oldn: 
            for f in range(n): 
              if f != i: 
                 phicol[f] -= (Alln[i] - oldn)/float(np.linalg.norm(rr[i]-rr[f]))

       print(Alln)


       for t in range(n): 
         for p in range(n): 
             if p != t:
               secondcheck = 0
               EE=phiext+phicol
               Ep = EE[p]
               Et = EE[t]
               oldp = Alln[p]
               oldt = Alln[t]
               rtp = float(np.linalg.norm(rr[t]-rr[p]))




               if oldt > 0 and oldp < 2:
                  if Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp < 0: 
                     secondcheck = 1  


               if oldp > 0 and oldt < 2:
                  if -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < 0:
                     secondcheck = -1  

               if oldp == 1 and oldt == 1 and Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp < 0 and -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < 0:
                  if -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp: 
                     secondcheck = -1
                  else:
                     secondcheck = 1

               if secondcheck == 1:
                  Alln[t] -= 1
                  Alln[p] += 1
               if secondcheck == -1:
                 Alln[t] += 1
                 Alln[p] -= 1
               if secondcheck != 0:
                 sscheck = 1

               if secondcheck != 0:
                 phicol[p] += -(Alln[t] - oldt)/rtp
                 phicol[t] += -(Alln[p] - oldp)/rtp 
                 for y in range(n):
                    if y != p and y != t: 
                        phicol[y] += -(Alln[p] - oldp)/float(np.linalg.norm(rr[p]-rr[y])) - (Alln[t]-oldt)/float(np.linalg.norm(rr[t]-rr[y]))



       print(check, " ", sscheck, ": ", Alln)

       if check == 0 and sscheck == 0: 
           break










     print("Last:")
     print(Alln)



     Efinial = np.zeros(2*n)
     Ei = -phiext
     for w in range(n): 
        for o in range(n):  
           if o != w: 
              row =  np.linalg.norm(rr[o]-rr[w])
              Ei[w] += (-1 + Alln[o])/float(row)
              Efinial[w] = Ei[w] - Delta/2 
              Efinial[w + n] = Ei[w] + Delta/2



     return Efinial

@jit(nopython=True,nogil=True)
def loop(Combinee1):
 for d in range(realization):
   Combinee1[d]=checks()
 return Combinee1

Combinee=loop(Combinee)
Combinee=Combinee.flatten()
Combinee=Combinee.tolist()
open('bothcheckdata.txt','w').close()
f=open("bothcheckdata.txt", "a+")
f.write(str(Combinee))
f.close()

end=time.time()
print(end-start)

Answer 1

您提供的代码很大而且很乱。但是，最大的问题是以错误的方式使用numpy。 Numpy的全部目的在于向量化而非循环。所以how can I further speed up my code?-正确使用numpy。

让我们看看您的代码中的一小部分：

# original code with nested loops with +1 to avoid zero division
def f_1(rr, pchargepos, nchargepos, phiext):

    for pos1 in range(n):
        for pos2 in range(int(len(nchargepos))):
            phiext[pos1] += (
                1 / (np.linalg.norm(rr[pos1] - pchargepos[pos2]) + 1) -
                1 / (np.linalg.norm(rr[pos1] - nchargepos[pos2]) + 1))

    return phiext

# modified code with numpy methods instead of loops
def f_2(rr, pchargepos, nchargepos, phiext):

    a = np.tile(pchargepos, (len(rr), 1))
    b = np.tile(nchargepos, (len(rr), 1))
    c = np.tile(rr, (len(pchargepos), 1)).T

    d = 1 / (abs(c - a) + 1)
    e = 1 / (abs(c - b) + 1)

    phiext += (d - e).sum(axis=1)

    return phiext

if __name__ == '__main__':

    n, m = 100, 200
    rr = np.random.randint(-10, 10, (n,))
    pchargepos = np.random.randint(-10, 10, (m,))
    nchargepos = np.random.randint(-10, 10, (m,))

    phiext = np.zeros((n,))
    ans_1 = f_1(rr, pchargepos, nchargepos, phiext)

    phiext = np.zeros((n,))
    ans_2 = f_2(rr, pchargepos, nchargepos, phiext)

    # check results
    print(sum(ans_1), sum(ans_2))

我的机器上的运行时间：

%timeit f_1(rr, pchargepos, nchargepos, phiext)
211 ms ± 967 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)

%timeit f_2(rr, pchargepos, nchargepos, phiext)
205 µs ± 214 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)

所以numpy的速度比普通python中的嵌套循环快1000倍。

如何在Numba中进一步加速代码？

1 个答案: