我想对python implementation的Sutherland-Hogman algorithm进行cython。该算法根据非常简单的规则(在边缘内部或外部等)更新顶点列表,但细节并不重要。这是python版本,它接受顺时针方向的多边形顶点列表。例如:
sP=[(50, 150), (200, 50), (350, 150), (350, 300), (250, 300), (200, 250), (150, 350),(100, 250), (100, 200)]
cP=[(100, 100), (300, 100), (300, 300), (100, 300)]
并计算他们的交集:
inter=clip(sP, cP)
这是在rosettacode上找到的代码,稍微修改一下,如果没有交集则返回一个空列表。
def clip(subjectPolygon, clipPolygon):
def inside(p):
return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
def computeIntersection():
dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
dp = [ s[0] - e[0], s[1] - e[1] ]
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
n2 = s[0] * e[1] - s[1] * e[0]
n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]
outputList = subjectPolygon
cp1 = clipPolygon[-1]
for clipVertex in clipPolygon:
cp2 = clipVertex
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e):
if not inside(s):
outputList.append(computeIntersection())
outputList.append(e)
elif inside(s):
outputList.append(computeIntersection())
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
这个函数对于我的应用程序来说非常慢,所以我尝试使用numpy进行cythonize。这是我的cython版本。我不得不在剪辑外定义两个函数,因为我有关于缓冲区输入的错误消息。
cython1
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
for i in xrange(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
outputList.append(e)
elif inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
def computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
cdef np.ndarray[np.float32_t, ndim=1] res=np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
return res
def inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
cdef bint b=(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
return b
当我计算两个版本时,我在加速时只获得了两倍,我需要至少10倍(或100倍!)。有什么事可做吗? 如何使用Cython处理列表?
编辑1:我关注@ DavidW的建议我分配numpy数组并修剪它们而不是使用list我现在正在使用cdef函数,这应该会带来10倍的速度不幸的是,我看不到任何加速!
cython2
cimport cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
return clip_in_c(subjectPolygon, clipPolygon)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef np.ndarray[np.float32_t, ndim=2] clip_in_c(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
cdef int cp_size=clipPolygon.shape[0]
cdef int outputList_effective_size=subjectPolygon.shape[0]
cdef int inputList_effective_size=outputList_effective_size
#We allocate a fixed size array of size
cdef int max_size_inter=outputList_effective_size*cp_size
cdef int k=-1
cdef np.ndarray[np.float32_t, ndim=2] outputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=2] inputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[cp_size-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2=np.empty((2,), dtype=np.float32)
outputList[:outputList_effective_size]=subjectPolygon
for i in xrange(cp_size):
cp2 = clipPolygon[i]
inputList[:outputList_effective_size] = outputList[:outputList_effective_size]
inputList_effective_size=outputList_effective_size
outputList_effective_size=0
s = inputList[inputList_effective_size-1]
for j in xrange(inputList_effective_size):
e = inputList[j]
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
k+=1
outputList[k]=e
elif inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
s = e
if k<0:
return np.empty((0,0),dtype=np.float32)
outputList_effective_size=k+1
cp1 = cp2
k=-1
return outputList[:outputList_effective_size]
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
return (cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
这是基准:
import numpy as np
from cython1 import clip_cython1
from cython2 import clip_cython2
import time
sp=np.array([[50, 150],[200,50],[350,150],[250,300],[200,250],[150,350],[100,250],[100,200]],dtype=np.float32)
cp=np.array([[100,100],[300,100],[300,300],[100,300]],dtype=np.float32)
t1=time.time()
for i in xrange(120000):
a=clip_cython1(sp, cp)
t2=time.time()
print (t2-t1)
t1=time.time()
for i in xrange(120000):
a=clip_cython2(sp, cp)
t2=time.time()
print (t2-t1)
39.45
44.12
第二个更糟糕!
编辑2 来自CodeReview的@Peter Taylor的最佳答案使用了这样一个事实:每次计算inside_s时它都是多余的,因为s = e并且你已经计算了inside_e(以及分解dc和n1在功能之外但它没有多大帮助。)
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
cdef bint inside_e, inside_s
cdef np.float32_t n1
cdef np.ndarray[np.float32_t, ndim=1] dc
cdef int i
for i in range(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
#intermediate
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
dc=cp1-cp2
inputList = outputList
outputList = []
s = inputList[-1]
inside_s=inside(s, cp1, dc)
for index, subjectVertex in enumerate(inputList):
e = subjectVertex
inside_e=inside(e, cp1, dc)
if inside_e:
if not inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
outputList.append(e)
elif inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
s = e
inside_s=inside_e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] dc, np.float32_t n1, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] dc):
return (-dc[0])*(p[1]-cp1[1]) > (-dc[1])*(p[0]-cp1[0])
混合两个版本(只有numpy数组和@Peter Taylor的技巧稍差)。不明白为什么?可能是因为我们必须分配一长串的大小sP.shape [0] * cp.shape [0]?
答案 0 :(得分:2)
在弄乱你的Cython代码后,我觉得找到你的库已经在其他地方实现的方式更容易了,所以查看scikit-image版本只是几行Numpy代码和你正在寻找的算法来自matplotlib:
import numpy as np
from matplotlib import path, transforms
def polygon_clip(rp, cp, r0, c0, r1, c1):
"""Clip a polygon to the given bounding box.
Parameters
----------
rp, cp : (N,) ndarray of double
Row and column coordinates of the polygon.
(r0, c0), (r1, c1) : double
Top-left and bottom-right coordinates of the bounding box.
Returns
-------
r_clipped, c_clipped : (M,) ndarray of double
Coordinates of clipped polygon.
Notes
-----
This makes use of Sutherland-Hodgman clipping as implemented in
AGG 2.4 and exposed in Matplotlib.
"""
poly = path.Path(np.vstack((rp, cp)).T, closed=True)
clip_rect = transforms.Bbox([[r0, c0], [r1, c1]])
poly_clipped = poly.clip_to_bbox(clip_rect).to_polygons()[0]
# This should be fixed in matplotlib >1.5
if np.all(poly_clipped[-1] == poly_clipped[-2]):
poly_clipped = poly_clipped[:-1]
return poly_clipped[:, 0], poly_clipped[:, 1]
如果没有别的,上面应该更容易转换为Cython。
[UPDATE] 从其他Cython答案分析中尝试这个包,它已经实现了从C ++到Python的多边形裁剪,称为https://pypi.python.org/pypi/pyclipper用法:
导入pyclipper
subj =( ((180,200),(260,200),(260,150),(180,150)), ((215,160),(230,190),(200,190)) )
clip =((190,210),(240,210),(240,130),(190,130))
pc = pyclipper.Pyclipper() pc.AddPath(clip,pyclipper.PT_CLIP,True) pc.AddPaths(subj,pyclipper.PT_SUBJECT,True)
solution = pc.Execute(pyclipper.CT_INTERSECTION,pyclipper.PFT_EVENODD,pyclipper.PFT_EVENODD)
上面的速度和我可怕的AMD PC BTW 9us的快速Cython代码答案大致相同。
答案 1 :(得分:2)
我加速了15倍:
In [12]: timeit clippy.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 126 µs per loop
In [13]: timeit clippy.clip1(clippy.sP, clippy.cP)
10000 loops, best of 3: 75.9 µs per loop
In [14]: timeit myclip.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 47.1 µs per loop
In [15]: timeit myclip.clip1(clippy.sP, clippy.cP)
100000 loops, best of 3: 8.2 µs per loop
clippy.clip
是您原来的功能。
clippy.clip1
也是Python,但用元组解包取代了大部分列表索引。
def clip1(subjectPolygon, clipPolygon):
def inside(p0,p1):
return(cp20-cp10)*(p1-cp11) > (cp21-cp11)*(p0-cp10)
def computeIntersection():
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return [(n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3]
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
s_in = inside(s0, s1)
for e0, e1 in inputList:
e_in = inside(e0, e1)
if e_in:
if not s_in:
outputList.append(computeIntersection())
outputList.append((e0, e1))
elif s_in:
outputList.append(computeIntersection())
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
myclip.clip
是原始cythonized
;仍在使用列表,而不是数组。
myclip.clip1
是第二个cythonized
:
cdef computeIntersection1(double cp10, double cp11, double cp20, double cp21,double s0, double s1,double e0, double e1):
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return (n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3
cdef cclip1(subjectPolygon, clipPolygon):
cdef double cp10, cp11, cp20, cp21
cdef double s0, s1, e0, e1
cdef double s_in, e_in
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
#s_in = inside(s0, s1)
s_in = (cp20-cp10)*(s1-cp11) - (cp21-cp11)*(s0-cp10)
for e0, e1 in inputList:
#e_in = inside(e0, e1)
e_in = (cp20-cp10)*(e1-cp11) - (cp21-cp11)*(e0-cp10)
if e_in>0:
if s_in<=0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
outputList.append((e0, e1))
elif s_in>0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
def clip1(subjectPolygon, clipPolygon):
return cclip1(subjectPolygon, clipPolygon)
-a
带注释的html
仍显示相当多的黄色,但大多数计算都不需要Python。在compute
函数中,有一个Python检查0除数,Python调用构建返回元组。并且元组解包仍然调用Python。所以还有改进的余地。
在Python代码中使用numpy
没有任何好处。列表很小,列表元素访问速度更快。但是在cython
数组中,数组可能是typed memoryviews
和纯C代码的基石。
其他时间。
你的第二次编辑:
In [24]: timeit edit2.clip(np.array(clippy.sP,np.float32), np.array(clippy.cP,np
...: .float32))
1000 loops, best of 3: 228 µs per loop
@Matt's
boundingbox
In [25]: timeit clippy.polygon_clip(clippy.rp,clippy.cp,100,100,300,300)
1000 loops, best of 3: 208 µs per loop
我通过定义扩展类
来清理代码cdef class Point:
cdef public double x, y
def __init__(self, x, y):
self.x = x
self.y = y
让我写下像:
s = inputList[-1]
s_in = insideP(s, cp1, cp2)
'cover'函数必须将元组列表转换为点列表和v.v。
sP = [Point(*x) for x in subjectPolygon]
对此有轻微的速度惩罚。