我定义了一个非常简单的函数ellipse_python():
import numpy as np
def ellipse_python(array_of_spec_arg,coords_detection, offset_is_true=False, UF=1.):
nrows=array_of_spec_arg.shape[0]
Ps=np.empty(shape=(nrows,1))
offset=0
if offset_is_true:
offset=3
for i in range(0,nrows):
list_of_spec=array_of_spec_arg[i,:]
coords_center=np.asarray(list_of_spec[0:2])/UF
edges_coords=np.asarray(list_of_spec[(offset+3):(offset+11)])
X=edges_coords[0:4]
Y=edges_coords[4:8]
Edges=np.zeros((4,))
Edges[0] = np.sqrt(np.square(X[0]-X[1]+1)+np.square(Y[0]-Y[1]+1))/UF
Edges[1] = np.sqrt(np.square(X[1]-X[2]+1)+np.square(Y[1]-Y[2]+1))/UF
Edges[2] = np.sqrt(np.square(X[2]-X[3]+1)+np.square(Y[2]-Y[3]+1))/UF
Edges[3] = np.sqrt(np.square(X[3]-X[0]+1)+np.square(Y[3]-Y[0]+1))/UF
Edges.sort()
Edges=Edges[::-1]
W=np.mean(Edges[0:2])/2
L=np.mean(Edges[2:4])/2
alpha=-list_of_spec[2]
distance=coords_detection-coords_center
xx=distance[0]
yy=distance[1]
x = np.cos(alpha)*xx-np.sin(alpha)*yy;
y = np.sin(alpha)*xx+np.cos(alpha)*yy;
P=(np.square(x/W)+np.square(y/L))
Ps[i]=P
return Ps
我经常在我的代码中使用它,使其成为速度的瓶颈所以我使用Cython numpy教程中可以找到的每个技巧对其进行了cython化。 这是我的.pyx的结果:
import numpy as np
cimport numpy as np
from cpython cimport bool
ctypedef np.float64_t dtype_t64
ctypedef np.float32_t dtype_t32
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
def ellipse_cython(np.ndarray[dtype_t64, ndim=2] array_of_spec_arg ,np.ndarray[np.int64_t, ndim=1] coords_detection, bool offset_is_true=False, dtype_t32 UF=1.):
cdef int nrows=array_of_spec_arg.shape[0]
cdef np.ndarray[dtype_t64,ndim=2] Ps=np.empty(shape=(nrows,1))
cdef int offset=0
cdef np.ndarray[dtype_t64, ndim=1] list_of_spec
cdef np.ndarray[dtype_t64, ndim=1] coords_center
cdef np.ndarray[dtype_t64, ndim=1] edges_coords
cdef np.ndarray[dtype_t64, ndim=1] X
cdef np.ndarray[dtype_t64, ndim=1] Y
cdef np.ndarray[dtype_t64, ndim=1] Edges=np.zeros((4,))
cdef dtype_t32 W
cdef dtype_t32 L
cdef dtype_t32 alpha
cdef np.ndarray[dtype_t64, ndim=1] distance
cdef dtype_t32 xx
cdef dtype_t32 yy
cdef dtype_t32 x
cdef dtype_t32 y
cdef dtype_t32 P
if offset_is_true:
offset=3
for i in xrange(nrows):
list_of_spec=array_of_spec_arg[i,:]
coords_center=np.asarray(list_of_spec[0:2])/UF
edges_coords=np.asarray(list_of_spec[(offset+3):(offset+11)])
X=edges_coords[0:4]
Y=edges_coords[4:8]
Edges[0] = np.sqrt(np.square(X[0]-X[1]+1)+np.square(Y[0]-Y[1]+1))/UF
Edges[1] = np.sqrt(np.square(X[1]-X[2]+1)+np.square(Y[1]-Y[2]+1))/UF
Edges[2] = np.sqrt(np.square(X[2]-X[3]+1)+np.square(Y[2]-Y[3]+1))/UF
Edges[3] = np.sqrt(np.square(X[3]-X[0]+1)+np.square(Y[3]-Y[0]+1))/UF
Edges.sort()
Edges=Edges[::-1]
W=np.mean(Edges[0:2])/2.
L=np.mean(Edges[2:4])/2.
alpha=-list_of_spec[2]
distance=coords_detection-coords_center
xx=distance[0]
yy=distance[1]
x = np.cos(alpha)*xx-np.sin(alpha)*yy;
y = np.sin(alpha)*xx+np.cos(alpha)*yy;
P=(np.square(x/W)+np.square(y/L))
Ps[i]=P
return Ps
如您所见,指定了每种类型并禁用了每项检查。 但是,当我计算两个功能时,我没有得到任何加速。 导入numpy为np
import ellipse_cython
import ellipse_python
width=45
cropped_window_centers=np.random.randint(width+1,1024-(width+1),(10,2))
array_of_spec=np.array([[34.372817, 220.59854,-1.496308, 1. , 1. ,0.,26., 45., 39.,27., 203.,204.,240.,234.],[ 34.811641,269.966265, -1.558297, 1. ,1.,0. ,26.,44.,43. ,26., 249.,251.,293.,287.]])
%timeit -n2 -r10 ellipse_python.ellipse_python(array_of_spec,cropped_window_centers[0], True, 4.)
%timeit -n2 -r10 ellipse_cython.ellipse_cython(array_of_spec,cropped_window_centers[0], True, 4.)
2 loops, best of 10: 2.42 ms per loop
2 loops, best of 10: 2.42 ms per loop
我这样做了吗?有没有什么可以提高速度?或者我应该对从numpy得到的东西感到满意。