以下功能是优化问题的一部分。这很简单但经常被调用。速度的提高会很好。我尝试了Numba,但似乎我必须在FORTRAN中写它:
import numpy as np
from numba import autojit
# I am using numbapro: from numbapro import autojit
# minimal dataset
n_comp_glob = 2
x_glob = np.random.rand(3*n_comp)
qs_glob = np.array([100.])
cp_glob =np.tile(1.,n_comp)
cs_glob = np.array( [100.])
def get_denom(n_comp,qs,x,cp,cs_f):
k = x[0:n_comp]
sigma = x[n_comp:2*n_comp]
z = x[2*n_comp:3*n_comp]
# calculates the denominator in Equ 14a - 14c (Brooks & Cramer 1992)
a = 0.0
for i in range(n_comp):
a += (sigma[i] + z[i])*( k[i]*(qs/cs)**(z[i]-1) )*cp[i]
return denom
get_denom_jit=autojit(get_denom)
import timeit
%timeit get_denom(n_comp_glob,qs_glob,x_glob,cp,cs_glob)
10000 loops, best of 3: 22.9 µs per loop
%timeit get_denom_jit(n_comp_glob,qs_glob,x_glob,cp_glob,cs_glob)
10000 loops, best of 3: 27.9 µs per loop
如果我增加组件数量(n_comp),numba仍然不比python快。为什么?
编辑:
我尝试了从numba documentation:
剪切的代码from numba import *
import numpy as np
mu = 0.1
Lx, Ly = 101, 101
N = 1000
def diffuse_loops(iter_num):
u = np.zeros((Lx, Ly), dtype=np.float64)
temp_u = np.zeros_like(u)
temp_u[Lx / 2, Ly / 2] = 1000.0
for n in range(iter_num):
for i in range(1, Lx - 1):
for j in range(1, Ly - 1):
u[i, j] = mu * (temp_u[i + 1, j] + temp_u[i - 1, j] +
temp_u[i, j + 1] + temp_u[i, j - 1] -
4 * temp_u[i, j])
temp = u
u = temp_u
temp_u = temp
return u
def diffuse_array_expressions(iter_num):
u = np.zeros((Lx, Ly), dtype=np.float64)
temp_u = np.zeros_like(u)
temp_u[Lx / 2, Ly / 2] = 1000.0
for i in range(iter_num):
u[1:-1, 1:-1] = mu * (temp_u[2:, 1:-1] + temp_u[:-2, 1:-1] +
temp_u[1:-1, 2:] + temp_u[1:-1, :-2] -
4 * temp_u[1:-1, 1:-1])
temp = u
u = temp_u
temp_u = temp
return u
diffuse_array_expressions_jit = autojit(diffuse_array_expressions)
diffuse_loops_jit = autojit(diffuse_loops)
调用函数:
%timeit diffuse_array_expressions(100)
100 loops, best of 3: 13.9 ms per loop
%timeit diffuse_array_expressions_jit(100)
100 loops, best of 3: 14.8 ms per loop
%timeit diffuse_loops(100)
1 loops, best of 3: 1.88 s per loop
%timeit diffuse_loops_jit(100)
1000 loops, best of 3: 1.87 ms per loop
看来,当代码在Python中正确实现时,numba失去了它的力量。上面的函数当然可以在没有循环的情况下实现:
def get_denom_vec(n_comp,qs,x,cp,cs_f):
k = x[0:n_comp]
sigma = x[n_comp:2*n_comp]
z = x[2*n_comp:3*n_comp]
# calculates the denominator in Equ 14a - 14c (Brooks & Cramer 1992)
a = 0.0
a = (sigma + z)*( k*(qs/cs)**(z-1) )*cp
denom = np.sum(a) + cs
return denom
get_denom_vec_jit = autojit(get_denom_vec)
%timeit get_denom_vec_jit(n_comp_glob,qs_glob,x_glob,cp_glob,cs_glob)
1000 loops, best of 3: 223 µs per loop
%timeit get_denom_vec_jit(n_comp_glob,qs_glob,x_glob,cp_glob,cs_glob)
1000 loops, best of 3: 245 µs per loop
答案 0 :(得分:4)
numba很酷的一点是你可以自己找到它!
使用
@autojit(nopython=True)
或
numba --annotate myfile.py
并迭代修复无法嵌入的代码。