为什么numba没有提高速度?

时间:2015-02-05 00:54:25

标签: python numpy numba

以下功能是优化问题的一部分。这很简单但经常被调用。速度的提高会很好。我尝试了Numba,但似乎我必须在FORTRAN中写它:

import numpy as np
from numba import autojit
# I am using numbapro: from numbapro import autojit

# minimal dataset
n_comp_glob = 2
x_glob = np.random.rand(3*n_comp) 
qs_glob = np.array([100.])
cp_glob =np.tile(1.,n_comp)
cs_glob = np.array( [100.])

def get_denom(n_comp,qs,x,cp,cs_f):
    k = x[0:n_comp]
    sigma = x[n_comp:2*n_comp]
    z = x[2*n_comp:3*n_comp]
    # calculates the denominator in Equ 14a - 14c (Brooks & Cramer 1992)
    a = 0.0 

    for i in range(n_comp):
         a += (sigma[i] + z[i])*( k[i]*(qs/cs)**(z[i]-1) )*cp[i]

    return denom

get_denom_jit=autojit(get_denom)

import timeit
%timeit get_denom(n_comp_glob,qs_glob,x_glob,cp,cs_glob)
10000 loops, best of 3: 22.9 µs per loop
%timeit get_denom_jit(n_comp_glob,qs_glob,x_glob,cp_glob,cs_glob)
10000 loops, best of 3: 27.9 µs per loop

如果我增加组件数量(n_comp),numba仍然不比python快。为什么?

编辑:

我尝试了从numba documentation

剪切的代码
from numba import *
import numpy as np

mu = 0.1
Lx, Ly = 101, 101
N = 1000


def diffuse_loops(iter_num):
    u = np.zeros((Lx, Ly), dtype=np.float64)
    temp_u = np.zeros_like(u)
    temp_u[Lx / 2, Ly / 2] = 1000.0

    for n in range(iter_num):
        for i in range(1, Lx - 1):
            for j in range(1, Ly - 1):
                u[i, j] = mu * (temp_u[i + 1, j] + temp_u[i - 1, j] +
                            temp_u[i, j + 1] + temp_u[i, j - 1] -
                            4 * temp_u[i, j])

    temp = u
    u = temp_u
    temp_u = temp

return u


def diffuse_array_expressions(iter_num):
    u = np.zeros((Lx, Ly), dtype=np.float64)
    temp_u = np.zeros_like(u)
    temp_u[Lx / 2, Ly / 2] = 1000.0

    for i in range(iter_num):
    u[1:-1, 1:-1] = mu * (temp_u[2:, 1:-1] + temp_u[:-2, 1:-1] +
                          temp_u[1:-1, 2:] + temp_u[1:-1, :-2] -
                          4 * temp_u[1:-1, 1:-1])

    temp = u
    u = temp_u
    temp_u = temp

return u

diffuse_array_expressions_jit = autojit(diffuse_array_expressions)
diffuse_loops_jit = autojit(diffuse_loops)

调用函数:

%timeit diffuse_array_expressions(100)
100 loops, best of 3: 13.9 ms per loop

%timeit diffuse_array_expressions_jit(100)
100 loops, best of 3: 14.8 ms per loop

%timeit diffuse_loops(100)
1 loops, best of 3: 1.88 s per loop

%timeit diffuse_loops_jit(100)
1000 loops, best of 3: 1.87 ms per loop

看来,当代码在Python中正确实现时,numba失去了它的力量。上面的函数当然可以在没有循环的情况下实现:

def get_denom_vec(n_comp,qs,x,cp,cs_f):
    k = x[0:n_comp]
    sigma = x[n_comp:2*n_comp]
    z = x[2*n_comp:3*n_comp]
    # calculates the denominator in Equ 14a - 14c (Brooks & Cramer 1992)
    a = 0.0 

    a = (sigma + z)*( k*(qs/cs)**(z-1) )*cp
    denom = np.sum(a) + cs
    return denom

get_denom_vec_jit = autojit(get_denom_vec)

%timeit get_denom_vec_jit(n_comp_glob,qs_glob,x_glob,cp_glob,cs_glob)
1000 loops, best of 3: 223 µs per loop

%timeit get_denom_vec_jit(n_comp_glob,qs_glob,x_glob,cp_glob,cs_glob)
1000 loops, best of 3: 245 µs per loop

1 个答案:

答案 0 :(得分:4)

numba很酷的一点是你可以自己找到它!

使用

@autojit(nopython=True)

numba --annotate myfile.py

并迭代修复无法嵌入的代码。