Question

我在example ASCII file中有一组显示2D图像的点。我想估计这些点填充的总面积。在这个平面内有一些地方没有被任何点填满，因为这些区域已经被掩盖了。我认为估计该区域可能是实用的，将应用凹壳或 alpha形状。我尝试this approach找到合适的alpha值，然后估算面积。

from shapely.ops import cascaded_union, polygonize
import shapely.geometry as geometry
from scipy.spatial import Delaunay
import numpy as np
import pylab as pl
from descartes import PolygonPatch
from matplotlib.collections import LineCollection
def plot_polygon(polygon):
    fig = pl.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    margin = .3
    x_min, y_min, x_max, y_max = polygon.bounds
    ax.set_xlim([x_min-margin, x_max+margin])
    ax.set_ylim([y_min-margin, y_max+margin])
    patch = PolygonPatch(polygon, fc='#999999',
                         ec='#000000', fill=True,
                         zorder=-1)
    ax.add_patch(patch)
    return fig
def alpha_shape(points, alpha):
    if len(points) < 4:
        # When you have a triangle, there is no sense
        # in computing an alpha shape.
        return geometry.MultiPoint(list(points)).convex_hull
    def add_edge(edges, edge_points, coords, i, j):
        """
        Add a line between the i-th and j-th points,
        if not in the list already
        """
        if (i, j) in edges or (j, i) in edges:
           # already added
           return
        edges.add( (i, j) )
        edge_points.append(coords[ [i, j] ])
    coords = np.array([point.coords[0]
                       for point in points])
    tri = Delaunay(coords)
    edges = set()
    edge_points = []
    # loop over triangles:
    # ia, ib, ic = indices of corner points of the
    # triangle
    for ia, ib, ic in tri.vertices:
        pa = coords[ia]
        pb = coords[ib]
        pc = coords[ic]
        # Lengths of sides of triangle
        a = np.sqrt((pa[0]-pb[0])**2 + (pa[1]-pb[1])**2)
        b = np.sqrt((pb[0]-pc[0])**2 + (pb[1]-pc[1])**2)
        c = np.sqrt((pc[0]-pa[0])**2 + (pc[1]-pa[1])**2)
        # Semiperimeter of triangle
        s = (a + b + c)/2.0
        # Area of triangle by Heron's formula
        area = np.sqrt(s*(s-a)*(s-b)*(s-c))
        circum_r = a*b*c/(4.0*area)
        # Here's the radius filter.
        #print circum_r
        if circum_r < 1.0/alpha:
                add_edge(edges, edge_points, coords, ia, ib)
                add_edge(edges, edge_points, coords, ib, ic)
                add_edge(edges, edge_points, coords, ic, ia)
        m = geometry.MultiLineString(edge_points)
        triangles = list(polygonize(m))
        return cascaded_union(triangles), edge_points
points=[]
with open("test.asc") as f:
     for line in f:
         coords=map(float,line.split(" "))
         points.append(geometry.shape(geometry.Point(coords[0],coords[1])))
         print geometry.Point(coords[0],coords[1])
x = [p.x for p in points]
y = [p.y for p in points]
pl.figure(figsize=(10,10))
point_collection = geometry.MultiPoint(list(points))
point_collection.envelope
convex_hull_polygon = point_collection.convex_hull
_ = plot_polygon(convex_hull_polygon)
_ = pl.plot(x,y,'o', color='#f16824')
concave_hull, edge_points = alpha_shape(points, alpha=0.001)
lines = LineCollection(edge_points)
_ = plot_polygon(concave_hull)       
_ = pl.plot(x,y,'o', color='#f16824')

我得到了这个结果，但我想这个方法可以检测到中间的洞。

更新
这就是我的真实数据的样子：

我的问题是估算上述形状区域的最佳方法是什么？我无法弄清楚这个代码不能正常工作的问题是什么？!!任何帮助将不胜感激。

Answer 1

这是一个想法：使用k-means clustering。

您可以在Python中完成此操作，如下所示：

from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

dat     = np.loadtxt('test.asc')
xycoors = dat[:,0:2]

fit = KMeans(n_clusters=2).fit(xycoors)

plt.scatter(dat[:,0],dat[:,1], c=fit.labels_)
plt.axes().set_aspect('equal', 'datalim')
plt.gray()
plt.show()

使用您的数据，会得到以下结果：

现在，您可以采用顶部群集和底部群集的凸包，并分别计算每个群集的面积。添加区域然后成为他们联合区域的估计，但是，狡猾地，避免中间的洞。

要微调您的结果，您可以使用群集的数量和算法的不同启动次数（算法是随机的，通常运行多次）。

例如，你问过两个星团是否总是将洞留在中间。我已经使用以下代码来试验它。我生成一个均匀的点分布，然后切出一个随机大小和定向的椭圆来模拟一个洞。

#!/usr/bin/env python3

import sklearn
import sklearn.cluster
import numpy as np
import matplotlib.pyplot as plt

PWIDTH  = 6
PHEIGHT = 6

def GetPoints(num):
  return np.random.rand(num,2)*300-150 #Centered about zero

def MakeHole(pts): #Chop out a randomly orientated and sized ellipse
  a = np.random.uniform(10,150)    #Semi-major axis
  b = np.random.uniform(10,150)    #Semi-minor axis
  h = np.random.uniform(-150,150)  #X-center
  k = np.random.uniform(-150,150)  #Y-center
  A = np.random.uniform(0,2*np.pi) #Angle of rotation
  surviving_points = []
  for pt in range(pts.shape[0]):
    x = pts[pt,0]
    y = pts[pt,1]
    if ((x-h)*np.cos(A)+(y-k)*np.sin(A))**2/a/a+((x-h)*np.sin(A)-(y-k)*np.cos(A))**2/b/b>1:
      surviving_points.append(pt)
  return pts[surviving_points,:]

def ShowManyClusters(pts,fitter,clusters,title):
  colors  = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk'])
  fig,axs = plt.subplots(PWIDTH,PHEIGHT)
  axs     = axs.ravel()
  for i in range(PWIDTH*PHEIGHT):
    lbls = fitter(pts[i],clusters)
    axs[i].scatter(pts[i][:,0],pts[i][:,1], c=colors[lbls])
    axs[i].get_xaxis().set_ticks([])
    axs[i].get_yaxis().set_ticks([])
  plt.suptitle(title)
  #plt.show()
  plt.savefig('/z/'+title+'.png')

fitters = {
  'SpectralClustering':  lambda x,clusters: sklearn.cluster.SpectralClustering(n_clusters=clusters,affinity='nearest_neighbors').fit(x).labels_,
  'KMeans':              lambda x,clusters: sklearn.cluster.KMeans(n_clusters=clusters).fit(x).labels_,
  'AffinityPropagation': lambda x,clusters: sklearn.cluster.AffinityPropagation().fit(x).labels_,
}

np.random.seed(1)
pts = []
for i in range(PWIDTH*PHEIGHT):
  temp = GetPoints(300)
  temp = MakeHole(temp)
  pts.append(temp)

for name,fitter in fitters.items():
  for clusters in [2,3]:
    np.random.seed(1)
    ShowManyClusters(pts,fitter,clusters,"{0}: {1} clusters".format(name,clusters))

考虑K-Means的结果：

至少在我看来，当“洞”将数据分成两个独立的blob时，似乎使用两个簇表现最差。（在这种情况下，当椭圆被定向使得它与包含样本点的矩形区域的两个边缘重叠时发生。）使用三个簇解决了大部分这些困难。

您还会注意到K-means在第1列，第3行以及第3列，第4行产生了一些反直觉的结果。回顾sklearn的聚类方法动态here，显示以下比较图像：

从图像中看，似乎SpectralClustering产生的结果与我们想要的结果一致。在上面的相同数据上尝试这个可以解决上面提到的问题（参见第1栏，第3行和第3列，第4行）。

上述内容表明，对于大多数此类情况，具有三个聚类的光谱聚类应该是足够的。

Answer 2

好的，这就是主意。 Delaunay三角剖分将产生不加区分的大三角形。它也会有问题，因为只会产生三角形。

因此，我们将生成您可能称之为“模糊Delaunay三角剖分”的东西。我们将所有点放入kd树中，对于每个点p，查看其k最近邻居。 kd-tree让这很快。

对于每个k个邻居，找到焦点p的距离。使用此距离生成权重。我们希望附近的点在更远的点上受到青睐，因此指数函数exp(-alpha*dist)在这里是合适的。使用加权距离建立概率密度函数，描述绘制每个点的概率。

现在，从该分布中抽取很多次。将经常选择附近的点，而不太经常选择更远的点。对于绘制的点，记下为焦点绘制的次数。结果是加权图，其中图中的每条边连接附近的点，并根据对的选择频率进行加权。

现在，从权重太小的图表中剔除所有边缘。这些是可能没有连接的点。结果如下：

现在，让我们把剩下的所有边都扔进shapely。然后我们可以通过缓冲它们将边缘转换为非常小的多边形。像这样：

使用覆盖整个区域的大多边形区分多边形将产生用于三角测量的多边形。可能还要等一下。结果如下：

最后，剔除所有太大的多边形：

#!/usr/bin/env python

import numpy as np
import matplotlib.pyplot as plt
import random
import scipy
import scipy.spatial
import networkx as nx
import shapely
import shapely.geometry
import matplotlib

dat     = np.loadtxt('test.asc')
xycoors = dat[:,0:2]
xcoors  = xycoors[:,0] #Convenience alias
ycoors  = xycoors[:,1] #Convenience alias
npts    = len(dat[:,0]) #Number of points

dist = scipy.spatial.distance.euclidean

def GetGraph(xycoors, alpha=0.0035):
  kdt  = scipy.spatial.KDTree(xycoors)         #Build kd-tree for quick neighbor lookups
  G    = nx.Graph()
  npts = np.max(xycoors.shape)
  for x in range(npts):
    G.add_node(x)
    dist, idx = kdt.query(xycoors[x,:], k=10) #Get distances to neighbours, excluding the cenral point
    dist      = dist[1:]                      #Drop central point
    idx       = idx[1:]                       #Drop central point
    pq        = np.exp(-alpha*dist)           #Exponential weighting of nearby points
    pq        = pq/np.sum(pq)                 #Convert to a PDF
    choices   = np.random.choice(idx, p=pq, size=50) #Choose neighbors based on PDF
    for c in choices:                         #Insert neighbors into graph
      if G.has_edge(x, c):                    #Already seen neighbor
        G[x][c]['weight'] += 1                #Strengthen connection
      else:
        G.add_edge(x, c, weight=1)            #New neighbor; build connection
  return G

def PruneGraph(G,cutoff):
  newg      = G.copy()
  bad_edges = set()
  for x in newg:
    for k,v in newg[x].items():
      if v['weight']<cutoff:
        bad_edges.add((x,k))
  for b in bad_edges:
    try:
      newg.remove_edge(*b)
    except nx.exception.NetworkXError:
      pass
  return newg


def PlotGraph(xycoors,G,cutoff=6):
  xcoors = xycoors[:,0]
  ycoors = xycoors[:,1]
  G = PruneGraph(G,cutoff)
  plt.plot(xcoors, ycoors, "o")
  for x in range(npts):
    for k,v in G[x].items():
      plt.plot((xcoors[x],xcoors[k]),(ycoors[x],ycoors[k]), 'k-', lw=1)
  plt.show()


def GetPolys(xycoors,G):
  #Get lines connecting all points in the graph
  xcoors = xycoors[:,0]
  ycoors = xycoors[:,1]
  lines = []
  for x in range(npts):
    for k,v in G[x].items():
      lines.append(((xcoors[x],ycoors[x]),(xcoors[k],ycoors[k])))
  #Get bounds of region
  xmin  = np.min(xycoors[:,0])
  xmax  = np.max(xycoors[:,0])
  ymin  = np.min(xycoors[:,1])
  ymax  = np.max(xycoors[:,1])
  mls   = shapely.geometry.MultiLineString(lines)   #Bundle the lines
  mlsb  = mls.buffer(2)                             #Turn lines into narrow polygons
  bbox  = shapely.geometry.box(xmin,ymin,xmax,ymax) #Generate background polygon
  polys = bbox.difference(mlsb)                     #Subtract to generate polygons
  return polys

def PlotPolys(polys,area_cutoff):
  fig, ax = plt.subplots(figsize=(8, 8))
  for polygon in polys:
    if polygon.area<area_cutoff:
      mpl_poly = matplotlib.patches.Polygon(np.array(polygon.exterior), alpha=0.4, facecolor=np.random.rand(3,1))
      ax.add_patch(mpl_poly)
  ax.autoscale()
  fig.show()


#Functional stuff starts here

G = GetGraph(xycoors, alpha=0.0035)

#Choose a value that rips off an appropriate amount of the left side of this histogram
weights = sorted([v['weight'] for x in G for k,v in G[x].items()])
plt.hist(weights, bins=20);plt.show() 

PlotGraph(xycoors,G,cutoff=6)       #Plot the graph to ensure our cut-offs were okay. May take a while
prunedg = PruneGraph(G,cutoff=6)    #Prune the graph
polys   = GetPolys(xycoors,prunedg) #Get polygons from graph

areas = sorted(p.area for p in polys)
plt.plot(areas)
plt.hist(areas,bins=20);plt.show()

area_cutoff = 150000
PlotPolys(polys,area_cutoff=area_cutoff)
good_polys = ([p for p in polys if p.area<area_cutoff])
total_area = sum([p.area for p in good_polys])

Answer 3

虽然你似乎打算做一个凹形的形状，但这里有另一条很快就是hella的路线，我觉得它会给你一个非常稳定的读数：

创建一个以参数（int radiusOfInfluence）为参数的函数。在函数内部运行一个体素过滤器，并将其作为半径。然后简单地将该圆的面积（pi * AOI ^ 2）乘以云中剩余点的数量。这应该给你一个相对稳健的面积估计，并且对孔和奇怪的边缘非常有弹性。

需要考虑的一些事项：

- 由于边缘超出正好一个半径，这将给你一个正面积超调。对此进行调整的修改可以是运行统计异常值去除滤波器（以反向模式）以获取统计边缘点。然后可以假设每个边缘点的大约一半位于形状之外，在乘以区域之前减去从总计数中找到的点数的一半。

- 影响半径很大程度上决定了这个功能的空洞检测，因为较大的一个将允许单点覆盖更大的区域，而且通过调整统计异常值滤波器上的标准截止，您可以更积极地检测内部打洞并以这种方式调整你的区域。

这确实引起了你所追求的问题，因为这更像是一个镜头准确度/镜头分组类型评估，假设一组合理分布的样本。您的方法有点假设您的外边缘点是可能的绝对限制（根据情况可能是公平的假设）

EDIT -----------------------

我没有时间写出示例代码，但我可以进一步解释以帮助理解。

这是voxel filter的核心。很简单，它在x，y坐标中设置种子点，然后在整个空间上创建一个网格，该网格在用户指定的过滤器半径的两个轴上都有单位（网格间距）。在每个网格框内，它将所有点平均为单个点。这对于这个概念非常重要，因为它几乎完全消除了重叠的问题。

第二部分（反stat outlier removal）只是有点聪明，可以收紧你的边缘。基本上，stat异常值用于通过查看从每个点到其（k）最近邻居的距离来消除噪声。在为每个点生成到k个最近邻居的平均距离之后，它建立直方图，并且用户定义的参数充当用于保持或移除点的二进制阈值。当倒置并设置为合理的截止值（〜0.75 std应该起作用）时，它将删除对象大部分中的所有点（即仅留下边缘点）。这一点很重要的原因在于技术上这些点超过了对象的边界1个半径。虽然有些是尖锐的，有些是钝角边缘（即大于或小于半圆的溢出），每个点取下1/2圆圈区域应该在整个物体上给你一个相当好的声音改善

请记住，虽然在一天结束时，这只会给你一个号码。就压力测试而言，我建议创建已知区域的人造点云，或者创建一个图形输出，显示您放置圆圈和半圆的位置（如果您喜欢，则朝向对象内部）。

您想要转向改进此方法的旋钮是：体素滤波器半径，每个点的影响区域（实际上可以与vox滤波器半径分开控制，尽管它们应该保持非常接近），std cutt-off。

希望这有助于澄清，欢呼！

Answer 4

编辑：

我注意到你有自己的代码来计算alpha形状， Delaunay三角形的区域就在那里，因此计算形状的区域更加容易......

如果要将三角形添加到alpha形状多边形，只需添加三角形区域。

如果要检测孔...添加辅助阈值以避免添加面积大于阈值的三角形。对于此示例，max_area = 99999的值将删除该孔。

唯一的问题是你创建图形输出的方式，因为你不会看到这个洞。

def alpha_shape(points, alpha, max_area):
    if len(points) < 4:
        # When you have a triangle, there is no sense
        # in computing an alpha shape.
        return geometry.MultiPoint(list(points)).convex_hull , 0
    def add_edge(edges, edge_points, coords, i, j):
        """
        Add a line between the i-th and j-th points,
        if not in the list already
        """
        if (i, j) in edges or (j, i) in edges:
           # already added
           return
        edges.add( (i, j) )
        edge_points.append(coords[ [i, j] ])
    coords = np.array([point.coords[0]
                       for point in points])
    tri = Delaunay(coords)
    total_area = 0
    edges = set()
    edge_points = []
    # loop over triangles:
    # ia, ib, ic = indices of corner points of the
    # triangle
    for ia, ib, ic in tri.vertices:
        pa = coords[ia]
        pb = coords[ib]
        pc = coords[ic]
        # Lengths of sides of triangle
        a = np.sqrt((pa[0]-pb[0])**2 + (pa[1]-pb[1])**2)
        b = np.sqrt((pb[0]-pc[0])**2 + (pb[1]-pc[1])**2)
        c = np.sqrt((pc[0]-pa[0])**2 + (pc[1]-pa[1])**2)
        # Semiperimeter of triangle
        s = (a + b + c)/2.0
        # Area of triangle by Heron's formula
        area = np.sqrt(s*(s-a)*(s-b)*(s-c))
        circum_r = a*b*c/(4.0*area)
        # Here's the radius filter.
        # print("radius", circum_r)
        if circum_r < 1.0/alpha and area < max_area:
                add_edge(edges, edge_points, coords, ia, ib)
                add_edge(edges, edge_points, coords, ib, ic)
                add_edge(edges, edge_points, coords, ic, ia)
                total_area += area

    m = geometry.MultiLineString(edge_points)
    triangles = list(polygonize(m))
    return cascaded_union(triangles), edge_points, total_area

旧答案：

要计算不规则简单多边形的面积，可以使用Shoelace formula和边界的CCW坐标作为输入。

如果要检测云内部的孔，则必须删除Delaunay三角形，其圆周大于次要阈值。理想的是：计算Delaunay三角剖分并使用当前的alpha形状进行过滤。然后，计算每个三角形的圆周，并去除那些圆周比平均圆周大得多的三角形。

要计算带孔的不规则多边形的面积，请为每个孔边界使用Shoelace公式。以CCW（正）顺序输入外部边界以获得该区域。然后以CW（负）顺序输入每个孔的边界，以获得面积的（负）值。

估计由一组点（Alpha形状??）生成的图像区域

4 个答案: