通过不同的权重组合大型数组是一项简单的工作,这需要很多时间,我想使用Cython来提供更好的加速,所以我编写了以下函数,但发现它没有得到性能提升,甚至比numpy版本还慢,我想知道是什么问题?谢谢
cython版本:
%%cython
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import prange
from libc.math cimport fabs
@cython.boundscheck(False)
@cython.wraparound(False)
def merge(const float[:, ::1] pop, const float[:, :, ::1] dat):
cdef Py_ssize_t pop_row = pop.shape[0]
cdef Py_ssize_t pop_col = pop.shape[1]
cdef Py_ssize_t dat_row = dat.shape[1]
cdef Py_ssize_t dat_col = dat.shape[2]
out = np.ones((pop_row, dat_row, dat_col), dtype = np.float32)
cdef float[:, :, ::1] out_view = out
cdef float tmp, fv, p, w
cdef Py_ssize_t pi, fi, xi, yi
for pi in prange(pop_row, nogil=True):
for fi in range(pop_col):
for xi in range(dat_row):
for yi in range(dat_col):
fv = dat[fi, xi, yi]
out_view[pi, xi, yi] *= fv * pop[pi, fi] + 1
return out
numpy版本:
def numpy_merge(pop,dat):
shap = list(dat.shape); shap[0] = len(pop)
outs = np.full(shap, 1, dtype = dat.dtype)
for i, p in enumerate(pop):
for x in np.where(p)[0]:
outs[i] *= dat[x] * p[x] + 1
outs -= 1
return outs
测试:
pop = np.random.uniform(-1,1,size=(100,100)).astype(np.float32)
dat = np.random.uniform( 0,1,size=(100,100,1000)).astype(np.float32)
%timeit merge(pop, dat)
%timeit numpy_merge(pop,dat)