Cython没有看到任何加速

时间:2016-08-18 09:16:02

标签: python numpy cython

我定义了一个非常简单的函数ellipse_python():

import numpy as np
    def ellipse_python(array_of_spec_arg,coords_detection, offset_is_true=False, UF=1.):


        nrows=array_of_spec_arg.shape[0]
        Ps=np.empty(shape=(nrows,1))
        offset=0
        if offset_is_true:
            offset=3

        for i in range(0,nrows):
            list_of_spec=array_of_spec_arg[i,:]
            coords_center=np.asarray(list_of_spec[0:2])/UF

            edges_coords=np.asarray(list_of_spec[(offset+3):(offset+11)])
            X=edges_coords[0:4]
            Y=edges_coords[4:8]

            Edges=np.zeros((4,))
            Edges[0] = np.sqrt(np.square(X[0]-X[1]+1)+np.square(Y[0]-Y[1]+1))/UF
            Edges[1] = np.sqrt(np.square(X[1]-X[2]+1)+np.square(Y[1]-Y[2]+1))/UF 
            Edges[2] = np.sqrt(np.square(X[2]-X[3]+1)+np.square(Y[2]-Y[3]+1))/UF
            Edges[3] = np.sqrt(np.square(X[3]-X[0]+1)+np.square(Y[3]-Y[0]+1))/UF


            Edges.sort()
            Edges=Edges[::-1]
            W=np.mean(Edges[0:2])/2
            L=np.mean(Edges[2:4])/2
            alpha=-list_of_spec[2]
            distance=coords_detection-coords_center
            xx=distance[0]
            yy=distance[1]

            x = np.cos(alpha)*xx-np.sin(alpha)*yy;
            y = np.sin(alpha)*xx+np.cos(alpha)*yy;
            P=(np.square(x/W)+np.square(y/L))


            Ps[i]=P


        return Ps  

我经常在我的代码中使用它,使其成为速度的瓶颈所以我使用Cython numpy教程中可以找到的每个技巧对其进行了cython化。 这是我的.pyx的结果:

import numpy as np
cimport numpy as np




from cpython cimport bool


ctypedef np.float64_t dtype_t64
ctypedef np.float32_t dtype_t32

cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)

def ellipse_cython(np.ndarray[dtype_t64, ndim=2] array_of_spec_arg ,np.ndarray[np.int64_t, ndim=1] coords_detection, bool offset_is_true=False, dtype_t32 UF=1.):


    cdef int nrows=array_of_spec_arg.shape[0]
    cdef np.ndarray[dtype_t64,ndim=2] Ps=np.empty(shape=(nrows,1))
    cdef int offset=0        
    cdef np.ndarray[dtype_t64, ndim=1] list_of_spec 
    cdef np.ndarray[dtype_t64, ndim=1] coords_center
    cdef np.ndarray[dtype_t64, ndim=1] edges_coords
    cdef np.ndarray[dtype_t64, ndim=1] X
    cdef np.ndarray[dtype_t64, ndim=1] Y
    cdef np.ndarray[dtype_t64, ndim=1] Edges=np.zeros((4,))
    cdef dtype_t32 W
    cdef dtype_t32 L
    cdef dtype_t32 alpha
    cdef np.ndarray[dtype_t64, ndim=1] distance
    cdef dtype_t32 xx
    cdef dtype_t32 yy
    cdef dtype_t32 x
    cdef dtype_t32 y
    cdef dtype_t32 P

    if offset_is_true:
        offset=3

    for i in xrange(nrows):
        list_of_spec=array_of_spec_arg[i,:]
        coords_center=np.asarray(list_of_spec[0:2])/UF    
        edges_coords=np.asarray(list_of_spec[(offset+3):(offset+11)])
        X=edges_coords[0:4]
        Y=edges_coords[4:8]


        Edges[0] = np.sqrt(np.square(X[0]-X[1]+1)+np.square(Y[0]-Y[1]+1))/UF
        Edges[1] = np.sqrt(np.square(X[1]-X[2]+1)+np.square(Y[1]-Y[2]+1))/UF 
        Edges[2] = np.sqrt(np.square(X[2]-X[3]+1)+np.square(Y[2]-Y[3]+1))/UF
        Edges[3] = np.sqrt(np.square(X[3]-X[0]+1)+np.square(Y[3]-Y[0]+1))/UF


        Edges.sort()
        Edges=Edges[::-1]
        W=np.mean(Edges[0:2])/2.
        L=np.mean(Edges[2:4])/2.
        alpha=-list_of_spec[2]
        distance=coords_detection-coords_center
        xx=distance[0]
        yy=distance[1]

        x = np.cos(alpha)*xx-np.sin(alpha)*yy;
        y = np.sin(alpha)*xx+np.cos(alpha)*yy;
        P=(np.square(x/W)+np.square(y/L))


        Ps[i]=P


    return Ps

如您所见,指定了每种类型并禁用了每项检查。 但是,当我计算两个功能时,我没有得到任何加速。 导入numpy为np

import ellipse_cython
import ellipse_python
width=45
cropped_window_centers=np.random.randint(width+1,1024-(width+1),(10,2))
array_of_spec=np.array([[34.372817, 220.59854,-1.496308,    1.  , 1. ,0.,26., 45., 39.,27., 203.,204.,240.,234.],[  34.811641,269.966265,   -1.558297,    1.  ,1.,0. ,26.,44.,43. ,26., 249.,251.,293.,287.]])
%timeit -n2 -r10 ellipse_python.ellipse_python(array_of_spec,cropped_window_centers[0], True, 4.)
%timeit -n2 -r10 ellipse_cython.ellipse_cython(array_of_spec,cropped_window_centers[0], True, 4.)


2 loops, best of 10: 2.42 ms per loop
2 loops, best of 10: 2.42 ms per loop

我这样做了吗?有没有什么可以提高速度?或者我应该对从numpy得到的东西感到满意。

0 个答案:

没有答案
相关问题