我有一个点列表
points = [(-57.213878612138828, 17.916958304169601),
(76.392039480378514, 0.060882542482108504),
(0.12417670682730897, 1.0417670682730924),
(-64.840321976787706, 21.374279296143762),
(-48.966302937359913, 81.336323778066188),
(11.122014925372399, 85.001119402984656),
(8.6383049769438465, 84.874829066623917),
(-57.349835526315836, 16.683634868421084),
(83.051530302006697, 97.450469562867383),
(8.5405200433369473, 83.566955579631625),
(81.620435769843965, 48.106831247886376),
(78.713027357450656, 19.547209139192304),
(82.926153287322933, 81.026080639302577)]
当用红色绘制时它们是这样的:
我现在想要融合彼此靠近的点(以黑色圈起来)。通过保险丝,我的意思是用一个具有坐标平均值的点替换这些点。
我确实理解有一整套聚类技术来做类似的工作。但是,如果我能够调整距离阈值,您可以看到这是一项简单的任务。所以我不愿意使用任何群集技术。 只需一个简单的解决方案即可。
我正在使用Python,如果它有帮助。
靠近,我的意思是它们之间的欧氏距离小于阈值,可以由我自己调整。所以右上两个圆点不会被圈起来。
答案 0 :(得分:7)
你可以有一个函数,给定距离d会融合给定点距离d内的点(通过取其平均值):
def dist2(p1, p2):
return (p1[0]-p2[0])**2 + (p1[1]-p2[1])**2
def fuse(points, d):
ret = []
d2 = d * d
n = len(points)
taken = [False] * n
for i in range(n):
if not taken[i]:
count = 1
point = [points[i][0], points[i][1]]
taken[i] = True
for j in range(i+1, n):
if Dist2(points[i], points[j]) < d2:
point[0] += points[j][0]
point[1] += points[j][1]
count+=1
taken[j] = True
point[0] /= count
point[1] /= count
ret.append((point[0], point[1]))
return ret
答案 1 :(得分:3)
你可以给出半径限制并迭代地连接比那个半径更近的点。如果您的数据集足够小,那么蛮力就足够了:
def join_pair(points, r):
for p, q in itertools.combinations(points, 2):
if dist(p, q) < r:
points.remove(p)
points.remove(q)
points.append(((p[0]+q[0]) / 2, (p[1]+q[1]) / 2))
return True
return False
while join_pair(points, R):
pass
答案 2 :(得分:0)
好的,这是我的严重未经优化的进入稍微复杂的算法,该算法首先创建一个布尔邻近矩阵,从最终用于获取平均坐标的聚类列表开始:
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 16 08:42:50 2013
@author: Tobias Kienzler
"""
def squared_distance(p1, p2):
# TODO optimization: use numpy.ndarrays, simply return (p1-p2)**2
sd = 0
for x, y in zip(p1, p2):
sd += (x-y)**2
return sd
def get_proximity_matrix(points, threshold):
n = len(points)
t2 = threshold**2
# TODO optimization: use sparse boolean matrix
prox = [[False]*n for k in xrange(n)]
for i in xrange(0, n):
for j in xrange(i+1, n):
prox[i][j] = (squared_distance(points[i], points[j]) < t2)
prox[j][i] = prox[i][j] # symmetric matrix
return prox
def find_clusters(points, threshold):
n = len(points)
prox = get_proximity_matrix(points, threshold)
point_in_list = [None]*n
clusters = []
for i in xrange(0, n):
for j in xrange(i+1, n):
if prox[i][j]:
list1 = point_in_list[i]
list2 = point_in_list[j]
if list1 is not None:
if list2 is None:
list1.append(j)
point_in_list[j] = list1
elif list2 is not list1:
# merge the two lists if not identical
list1 += list2
point_in_list[j] = list1
del clusters[clusters.index(list2)]
else:
pass # both points are already in the same cluster
elif list2 is not None:
list2.append(i)
point_in_list[i] = list2
else:
list_new = [i, j]
for index in [i, j]:
point_in_list[index] = list_new
clusters.append(list_new)
if point_in_list[i] is None:
list_new = [i] # point is isolated so far
point_in_list[i] = list_new
clusters.append(list_new)
return clusters
def average_clusters(points, threshold=1.0, clusters=None):
if clusters is None:
clusters = find_clusters(points, threshold)
newpoints = []
for cluster in clusters:
n = len(cluster)
point = [0]*len(points[0]) # TODO numpy
for index in cluster:
part = points[index] # in numpy: just point += part / n
for j in xrange(0, len(part)):
point[j] += part[j] / n # TODO optimization: point/n later
newpoints.append(point)
return newpoints
points = [(-57.213878612138828, 17.916958304169601),
(76.392039480378514, 0.060882542482108504),
(0.12417670682730897, 1.0417670682730924),
(-64.840321976787706, 21.374279296143762),
(-48.966302937359913, 81.336323778066188),
(11.122014925372399, 85.001119402984656),
(8.6383049769438465, 84.874829066623917),
(-57.349835526315836, 16.683634868421084),
(83.051530302006697, 97.450469562867383),
(8.5405200433369473, 83.566955579631625),
(81.620435769843965, 48.106831247886376),
(78.713027357450656, 19.547209139192304),
(82.926153287322933, 81.026080639302577)]
threshold = 20.0
clusters = find_clusters(points, threshold)
clustered = average_clusters(points, clusters=clusters)
print "clusters:", clusters
print clustered
import matplotlib.pyplot as plt
ax = plt.figure().add_subplot(1, 1, 1)
for cluster in clustered:
ax.add_patch(plt.Circle(cluster, radius=threshold/2, color='g'))
for point in points:
ax.add_patch(plt.Circle(point, radius=threshold/2, edgecolor='k', facecolor='none'))
plt.plot(*zip(*points), marker='o', color='r', ls='')
plt.plot(*zip(*clustered), marker='.', color='g', ls='')
plt.axis('equal')
plt.show()
(为了更好的可视化,圆的半径半阈值,即如果它们的圆只相交/触摸彼此的边缘,则点在同一个簇中。)
答案 3 :(得分:0)
page-break-inside: avoid;