我正在尝试修改numpy implementation of the Ramer–Douglas–Peucker (RDP) algorithm,以返回过滤值的掩码,而不是带有值的过滤数组。
问题是如何递归返回正确的掩码。
答案 0 :(得分:1)
有助于正确理解算法。如果每个递归调用中第一个和最后一个段之间的所有点都在epsilon内,则应将这些点标记为False,而start和end应为True。这是我的解决方案:
import numpy as np
def pldist(x0, x1, x2):
return np.divide(np.linalg.norm(np.linalg.det([x2 - x1, x1 - x0])),
np.linalg.norm(x2 - x1))
def rdp_index(M, epsilon=0, dist=pldist):
dmax = 0.0
index = -1
for i in xrange(1, M.shape[0]):
d = dist(M[i], M[0], M[-1])
if d > dmax:
index = i
dmax = d
if dmax > epsilon:
r1 = rdp_index(M[:index + 1], epsilon, dist)
r2 = rdp_index(M[index:], epsilon, dist)
return np.vstack((r1[:-1], r2))
else:
part_mask = np.empty_like(M, dtype = bool)
part_mask.fill(False)
part_mask[0] = True
part_mask[-1] = True
return part_mask
请注意,此实现使用递归,此解决方案会出现大型数据集https://stackoverflow.com/a/2401520/2082968的问题。即超过了最大递归调用量。下面是一个使用堆栈和while循环而不是递归调用的解决方案。另外,更有效地计算距离。
def dsquared_line_points(P1, P2, points):
'''
Calculate only squared distance, only needed for comparison
http://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
'''
xdiff = P2[0] - P1[0]
ydiff = P2[1] - P1[1]
nom = (
ydiff*points[:,0] - \
xdiff*points[:,1] + \
P2[0]*P1[1] - \
P2[1]*P1[0]
)**2
denom = ydiff**2 + xdiff**2
return np.divide(nom, denom)
def rdp_numpy(M, epsilon = 0):
# initiate mask array
# same amount of points
mask = np.empty(M.shape[0], dtype = bool)
# Assume all points are valid and falsify those which are found
mask.fill(True)
# The stack to select start and end index
stack = [(0 , M.shape[0]-1)]
while (len(stack) > 0):
# Pop the last item
(start, end) = stack.pop()
# nothing to calculate if no points in between
if end - start <= 1:
continue
# Calculate distance to points
P1 = M[start]
P2 = M[end]
points = M[start + 1:end]
dsq = dsquared_line_points(P1, P2, points)
mask_eps = dsq > epsilon**2
if mask_eps.any():
# max point outside eps
# Include index that was sliced out
# Also include the start index to get absolute index
# And not relative
mid = np.argmax(dsq) + 1 + start
stack.append((start, mid))
stack.append((mid, end))
else:
# Points in between are redundant
mask[start + 1:end] = False
return mask