以下是两个代码示例
def hough_transform_1(active_points, size_trame, size_max_song):
m = np.linspace(0.95, 1.05, 11)
p = np.linspace(-size_trame, size_max_song, size_max_song + size_trame + 1)
acc = np.zeros([m.size, p.size])
for m_i in m:
for x_i, y_i in active_points:
p_i = y_i - m_i * x_i + size_trame
if p_i >= 0 and p_i < p.size:
acc[m_i * 100 - 95, p_i] += 1
#Return some value
DTYPE_INT = np.int
DTYPE_FLOAT = np.float
ctypedef np.int_t DTYPE_INT_t
ctypedef np.float_t DTYPE_FLOAT_t
@cython.boundscheck(False)
def hough_transform_2(np.ndarray[DTYPE_FLOAT_t, ndim=2] activepoints, sizetrame, sizemaxsong):
cdef size_trame = sizetrame
cdef size_max_song = sizemaxsong
cdef np.ndarray[DTYPE_FLOAT_t, ndim=2] active_points = activepoints
cdef DTYPE_FLOAT_t x_i, y_i, m_i, p_i
cdef float best_transformed
cdef np.ndarray[DTYPE_FLOAT_t, ndim=1] m = np.linspace(0.95, 1.05, 11).astype(DTYPE_FLOAT)
cdef np.ndarray[DTYPE_INT_t, ndim=2] acc = np.zeros([m.size, size_max_song + size_trame + 1], dtype=DTYPE_INT)
cdef int i_range = m.size
cdef int j_range = active_points.shape[0]
for i in range(i_range):
m_i = m[i]
for j in range(j_range):
x_i = active_points[j][0]
x_i = active_points[j][1]
p_i = y_i - m_i * x_i + size_trame
if p_i >= 0 and p_i < size_max_song + size_trame + 1:
acc[m_i * 100 - 95, p_i] += 1
#Return some value
这两个函数(在给定(x,y)输入点列表的情况下检测斜率介于0.95和1.05之间的行)是等效的,但第二个使用cython优化。
用(x为1或2)测试他们的速度
time1 = time.time()
for _ in range(100):
hough_transform_x(points, self.length, self.length)
time2 = time.time()
我得到了那些结果:
35s for hough_transform_1; 20分钟为hough_transform_2
由于在这种类型的函数上使用Cython应该会产生更显着的加速(我预期100次而不是1.75次),我认为我的cythonized代码出了问题,但无法检测到它。我错过了什么?
答案 0 :(得分:3)
首先,键入所有内容。其次,实际输入它们。
这些都没有输入(!),应该在参数列表中输入:
cdef size_trame = sizetrame
cdef size_max_song = sizemaxsong
这是多余的:
cdef np.ndarray[DTYPE_FLOAT_t, ndim=2] active_points = activepoints
这很好:
cdef DTYPE_FLOAT_t x_i, y_i, m_i, p_i
你不能使用它:
cdef float best_transformed
这可能会更好地硬编码为C数组(DTYPE_FLOAT_t[11]
):
cdef np.ndarray[DTYPE_FLOAT_t, ndim=1] m = np.linspace(0.95, 1.05, 11).astype(DTYPE_FLOAT)
这些都很好:
cdef np.ndarray[DTYPE_INT_t, ndim=2] acc = np.zeros([m.size, size_max_song + size_trame + 1], dtype=DTYPE_INT)
cdef int i_range = m.size
cdef int j_range = active_points.shape[0]
i
是无类型的:
for i in range(i_range):
m_i = m[i]
j
是无类型的:
for j in range(j_range):
这毫无意义:
x_i = active_points[j][0]
您想要active_points[j, 1]
:
x_i = active_points[j][1]
p_i = y_i - m_i * x_i + size_trame
0 <= pi < size_max_song + size_trame + 1
if p_i >= 0 and p_i < size_max_song + size_trame + 1:
acc[m_i * 100 - 95, p_i] += 1