工作原理：

Question

我有一个n维网格点但是它上面有洞，我想得到一个缺少网格点的列表。但是，我不想扩展现有网格的边界。

E.g。在2D中，如果上下左右有任何值，我只想要网格点坐标。这是一张卡通片，o是现有的点，x是我想要的坐标。

    o o o o o 
o o x o o x o
o x x o o
  o x o o
  o o o o

但数据不在网格中。它只是一个坐标列表，即

coords = [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5), 
(1100,4.5), (1100,5.5), (1200,4.0), (1200,4.5), (1200,5.0), (1200,5.5), 
(1300,3.5), (1300,4.0), (1300,4.5)]

所以我想要的值是[(1100,3.5), (1100,4.0), (1100,5.0), (1200,3.5)]。

我尝试获取每个参数的最小值和最大值并创建一个新轴numpy.arange(min(param1),max(param1),100)，然后通过numpy.setdiff1d()将其与旧值进行比较，但这会使网格成为矩形，而不是必需的。

关于如何有效地做到这一点的任何想法？

Answer 1

Dunno关于速度，但这里是一个适用于D维度的，这是有点受现有评论和答案的启发。对于正方形的一套宽度为w的点，约为D*w**(D-1)次。它循环遍历每个维度，查看沿着该维度的投影，并沿着该投影中的维度循环所有线条，沿着每条线执行setdiff。

import numpy as np

def grid_holes(coords):
    coords = np.atleast_2d(coords)
    N, D = coords.shape
    coords = coords[np.lexsort(coords.T)]
    diff = np.diff(coords, axis=0)
    spacing = np.where(diff, np.abs(diff), np.inf).min(0)

    missing = []
    for d in xrange(D):
        projection = np.delete(coords, d, 1)
        order = np.lexsort(projection.T)
        gridlines = np.split(coords[order],
                np.diff(projection[order], axis=0).any(1).nonzero()[0] + 1)
        for gridline in gridlines:
            x = gridline[:, d]
            s = spacing[d]
            i = np.round(x/s).astype(int)
            gaps = np.diff(i) - 1
            gap_locs = gaps.nonzero()[0]
            if not len(gap_locs):
                continue
            mx = [ x[loc] + s*(g+1) for loc in gap_locs
                                    for g in xrange(gaps[loc])]
            mcoords = np.repeat(gridline[:1], len(mx), 0)
            mcoords[:, d] = mx
            missing.append(mcoords)
    return np.concatenate(missing)

测试它的功能：

def test_grid_holes(coords, known_holes=None, func=grid_holes):
    ret = ()
    if isinstance(coords, tuple) and len(coords)==2:
        # Generate random coords
        N, D = coords
        coords = np.random.randint(0, int(N**(1./D)), coords)
        ret += (coords, )
    else:
        coords = np.atleast_2d(coords)
        N, D = coords.shape
    found_holes = func(coords)
    found_holes = np.unique(found_holes.view('f8,'*D)).view('f8').reshape(-1, D)
    ret += (found_holes,)
    if D <= 3:
        import matplotlib.pyplot as plt
        fig = plt.figure()
        if D == 2:
            ax = fig.add_subplot(111)
        elif D == 3:
            from mpl_toolkits.mplot3d import Axes3D
            ax = fig.add_subplot(111, projection='3d')
        if known_holes is not None:
            known_holes = np.atleast_2d(known_holes)
            ax.scatter(*known_holes.T, c='r', marker='o')
        ax.scatter(*coords.T, c='k', marker='o')
        ax.scatter(*found_holes.T, c='k', marker='x')

    if known_holes is not None:
        known_holes = np.unique(known_holes.view('f8,'*D)).view('f8').reshape(-1, D)
        return np.allclose(found_holes, known_holes)
    else:
        return ret

在这里，我们可以测试您的数据和生成的数据：

coords = [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
          (1100,4.5), (1100,5.5), (1200,4.0), (1200,4.5), (1200,5.0),
          (1200,5.5), (1300,3.5), (1300,4.0), (1300,4.5)]
holes = [(1100,3.5), (1100,4.0), (1100,5.0), (1200,3.5)]

test_grid_holes(coords, holes)

test_grid_holes((100, 3))

Answer 2

我认为最简单的方法是将网格映射到矩形阵列。因为那时确定哪些点落在标准内是相对简单和快速的。缺点是RAM使用可能最终成为一个问题，特别是对于稀疏网格。

仍然有争议的一点是如何定义网格。其他答案当前使用沿元素之间的维度的最小差异作为该方向上的网格的步长。然而，这在极少数情况下会产生问题。例如。如果已知坐标是：

2, 4, 6, 9, 11

然后步长将等于2，但显然这在9出错了。也许最好采取连续差异的最大公约数？例如。在this answer的帮助下。在我的代码中，我采取了不同的方法：只有＆＃34; ticks＆＃34;存在于已知坐标中用于构造网格。

对于2D情况，类似下面的内容就足够了：

def find_holes_2d(coords):
    coords = np.asanyarray(coords)

    # determine grid and transform coordinates
    uniq_x, labels_x = np.unique(coords[:,0], return_inverse=True)
    uniq_y, labels_y = np.unique(coords[:,1], return_inverse=True)

    # layout the known grid in an array
    grid = np.zeros([len(uniq_x), len(uniq_y)], bool)
    grid[labels_x, labels_y] = True

    # see which grid points are inside known coordinates
    x_fwd  = np.logical_or.accumulate(grid, axis=0)
    x_bkwd = np.logical_or.accumulate(grid[::-1], axis=0)[::-1]
    y_fwd  = np.logical_or.accumulate(grid, axis=1)
    y_bkwd = np.logical_or.accumulate(grid[:,::-1], axis=1)[:,::-1]

    # select the holes according to the criteria
    holes = ~grid & (x_fwd & x_bkwd | y_fwd & y_bkwd)

    # Transform positions back to original coordinates
    I,J = np.where(holes)
    return np.column_stack([uniq_x[I], uniq_y[J]])

相同的方法可以应用于ND案例，例如：

def find_holes(coords):
    coords = np.asanyarray(coords)

    uniq, labels = zip(*[np.unique(c, return_inverse=True) for c in coords.T])

    grid = np.zeros(map(len, uniq), bool)
    grid[labels] = True

    candidates = np.zeros_like(grid)
    for dim in range(grid.ndim):
        grid0 = np.rollaxis(grid, dim)
        inside = np.logical_or.accumulate(grid0, axis=0) & 
                 np.logical_or.accumulate(grid0[::-1], axis=0)[::-1]
        candidates |= np.rollaxis(inside, 0, dim+1)
    holes = candidates & ~grid

    hole_labels = np.where(holes)

    return np.column_stack([u[h] for u, h in zip(uniq, hole_labels)])

最后，这个玩具示例显示了剩下的一个问题：

o x o o
x   x o
o o o o

这里仍有一个洞仍未被发现＆＃34;未被发现＆＃34;。通过将找到的孔的坐标（x＆＃39; s）添加到原始坐标并运行第二次迭代，可以轻松解决这个问题。

Answer 3

这是您的示例的解决方案。但是，我不认为这可以很容易地推广到n维。

工作原理：

从行中的孔开始。将顶点列表转换为数组并使用词典排序对行进行排序。

import numpy as np
import matplotlib.pyplot as plt

coords = np.asarray(
    [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
     (1100,4.5), (1100, 6.5), (1200,4.0), (1200,5.5), (1200,7.0), (1200,5.5),
     (1300,3.5), (1300,4.0), (1300,4.5), (1300, 5.5), (1700,5.0) ])

coords = coords[ np.lexsort(( coords[:,1], coords[:,0] )),:]

将网格大小确定为不为零的顶点之间的最小差异。

diffs = np.diff(coords, axis = 0)
dx = np.min(diffs[diffs[:,0] > 0.0, 0])
dy = np.min(diffs[diffs[:,1] > 0.0, 1])

网格包含x坐标没有变化的洞，y坐标的变化大于dy。

indices = (diffs[:,0] == 0.0) * (diffs[:,1] > dy)

使用索引将孔扩展为缺失网格点列表，以提取起点和孔的长度。最后，连接进入numpy.array或者如果没有洞则返回空数组。

hole_list = [ np.asarray( [ [x, y] for y in np.arange( y + dy, y + Dy, dy )] )
                            for ((x, y), Dy) in zip ( coords[indices,:],
                                                      diffs[indices,1] ) ]

if len( hole_list ) > 0:
    holes_x = np.concatenate( hole_list )
else:
    holes_x = np.asarray( [] )

现在将找到的孔添加到网格中并在列中查找孔。只需要切换字典顺序的顺序并在行中添加孔以避免找到它们两次。

# Holes in columns.
coords_x = np.append( coords, holes_x, axis = 0 )
coords_x = coords[ np.lexsort( ( coords[:,0], coords[:,1] ) ), : ]
diffs = np.diff( coords_x, axis = 0 )

indices = ( diffs[:,1] == 0.0 ) * ( diffs[:,0] > dx )
hole_list = [ np.asarray( [ [x, y] for x in np.arange( x + dx, x + Dx, dx )] )
                            for ((x, y), Dx) in zip ( coords_x[indices,:],
                                                      diffs[indices,0] ) ]
if len( hole_list ) > 0:
    holes_y = np.concatenate( hole_list )
else:
    holes_y = np.asarray( [] )

实施例

import numpy as np
import matplotlib.pyplot as plt

coords = np.asarray(
    [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
     (1100,4.5), (1100, 6.5), (1200,4.0), (1200,5.5), (1200,7.0), (1200,5.5),
     (1300,3.5), (1300,4.0), (1300,4.5), (1300, 5.5), (1700,5.0) ])

coords = coords[ np.lexsort(( coords[:,1], coords[:,0] )),:]

# Find x and y grid sizes.
diffs = np.diff(coords, axis = 0)
dx = np.min(diffs[diffs[:,0] > 0.0, 0])
dy = np.min(diffs[diffs[:,1] > 0.0, 1])

# Holes in rows.
indices = (diffs[:,0] == 0.0) * (diffs[:,1] > dy)
hole_list = [ np.asarray( [ [x, y] for y in np.arange( y + dy, y + Dy, dy )] )
                            for ((x, y), Dy) in zip ( coords[indices,:],
                                                      diffs[indices,1] ) ]

if len( hole_list ) > 0:
    holes_x = np.concatenate( hole_list )
else:
    holes_x = np.asarray( [] )


# Holes in columns.
coords_x = np.append( coords, holes_x, axis = 0 )
coords_x = coords[ np.lexsort( ( coords[:,0], coords[:,1] ) ), : ]
diffs = np.diff( coords_x, axis = 0 )

indices = ( diffs[:,1] == 0.0 ) * ( diffs[:,0] > dx )
hole_list = [ np.asarray( [ [x, y] for x in np.arange( x + dx, x + Dx, dx )] )
                            for ((x, y), Dx) in zip ( coords_x[indices,:],
                                                      diffs[indices,0] ) ]
if len( hole_list ) > 0:
    holes_y = np.concatenate( hole_list )
else:
    holes_y = np.asarray( [] )

# Plot holes.
f = plt.figure()
ax = f.add_subplot(111)
ax.scatter( coords[:,0], coords[:,1], c = 'g', s=200 )
ax.scatter( holes_x[:,0], holes_x[:,1], c = 'r', s=50 )
ax.scatter( holes_y[:,0], holes_y[:,1], c = 'b', s=50 )

Python：完整的非对称网格

3 个答案:

工作原理：

实施例