Question

有哪些算法（最好带代码）可以将大量的x-y点移动到网格上最近的点，而不允许多个点占据相同的位置？

假设我有50,000个红色或绿色点，每个点在连续空间中具有不同的（x，y）位置。我希望使用面向像素的显示器，使每个点占据800x800像素画布上的像素，这些点从原始位置移动尽可能少（例如，最小化平方位移距离）。

Keim's GridFit algorithm似乎是一种方法，但我无法在线找到实现，而且很久以前就已经发布了。是否有GridFit的实现可用？更好的是，是否有更新的技术使用位移来避免散点图上的重叠点（通常对于任意均匀尺寸的正方形/点）？

Answer 1

理论上，您可以使用maximum weighted bipartite matching以最佳方式解决此问题。但这需要时间立方数的点数，这对于如此大的n来说太慢了。

可能有更快的启发式方法，从与确切解决方案相同的公式开始，因此解释如何设置它可能是有用的：

设A是对应于输入点的一组顶点，B是对应于所有网格点的一组顶点，并且对于每对点（a，b），其中A和B在B中，你会创建一个边（a，b），其权重等于a和b之间的欧几里德距离的负值。然后你可以用匈牙利算法抛出它，它会告诉你哪个网格点（如果有的话）与每个输入点匹配。

Answer 2

目前，我已经在Python中实现了一个GridFit版本。如果其他人想要使用它，请随意 - 我很高兴这是在CC-Zero之下。有可能改进算法的方法，例如通过使用点分布（而不是框的纵横比）来选择何时垂直平分和水平平分。

import numpy as np

def bisect(points, indices, bottom_left, top_right):
    '''Freely redistributable Python implementation by Yan Wong of the pixel-fitting "Gridfit" algorithm as described in: Keim, D. A. 
    and Herrmann, A. (1998) The Gridfit algorithm: an efficient and effective approach to visualizing large amounts of spatial data. 
    Proceedings of the Conference on Visualization \'98, 181-188.

    The implementation here differs in 2 main respects from that in the paper. Firstly areas are not always bisected in horizontal then vertical order, 
    instead they are bisected horizontally if the area is taller then wide, and vertically if wider than tall. Secondly, a single pass algorithm
    is used which produces greater consistency, in that the order of the points in the dataset does not determine the outcome (unless points have
    identical X or Y values. Details are described in comments within the code.'''
    if len(indices)==0:
        return
    width_minus_height = np.diff(top_right - bottom_left)
    if width_minus_height == 0:
        #bisect on the dimension which best splits up the point to each side of the midline
        evenness = np.abs(np.mean(points[indices] < (top_right+bottom_left)/2.0, axis=0)-0.5)
        dim = int(evenness[0] > evenness[1])
    else:
        dim = int(width_minus_height > 0) #if if wider than tall, bisect on dim = 1
    minpix = bottom_left[dim]
    maxpix = top_right[dim]
    size = maxpix-minpix
    if size == 1: # we are done: set the position of the point to the middle of the pix
        if len(indices) > 1: print "ERROR" #sanity check: remove for faster speed
        points[indices, :] = bottom_left+0.5
        return
    other_dim = top_right[1-dim] - bottom_left[1-dim]

    cutpoint_from = (maxpix+minpix)/2.0
    cutpoint_to = None
    lower_cut = int(np.floor(cutpoint_from))
    upper_cut = int(np.ceil(cutpoint_from))
    lower = points[indices, dim] < lower_cut
    upper = points[indices, dim] >= upper_cut
    lower_points = indices[lower] 
    upper_points = indices[upper]

    if lower_cut!=upper_cut: # initial cutpoint falls between pixels. If cutpoint will not shift, we need to round it up or down to the nearest integer
        mid_points = indices[np.logical_and(~lower, ~upper)]
        low_cut_lower = len(lower_points) <= (lower_cut - minpix) * other_dim
        low_cut_upper = len(upper_points) + len(mid_points) <= (maxpix-lower_cut) * other_dim
        up_cut_lower = len(lower_points) + len(mid_points) <= (upper_cut-minpix) * other_dim
        up_cut_upper = len(upper_points) <= (maxpix-upper_cut) * other_dim
        low_cut_OK = (low_cut_lower and low_cut_upper)
        up_cut_OK = (up_cut_lower and up_cut_upper)

        if low_cut_OK and not up_cut_OK:
            cutpoint_from = lower_cut
            upper_points = np.append(upper_points, mid_points)
        elif up_cut_OK and not low_cut_OK:
            cutpoint_from = upper_cut
            lower_points = np.append(lower_points, mid_points)
        else:
            lowmean = np.mean(points[indices, dim]) < cutpoint_from
            if low_cut_OK and up_cut_OK:
                if (lowmean):
                    cutpoint_from = lower_cut
                    upper_points = np.append(upper_points, mid_points)
                else:
                    cutpoint_from = upper_cut
                    lower_points = np.append(lower_points, mid_points)
            else:
                #if neither low_cut_OK or up_cut_OK, we will end up shifting the cutpoint to an integer value anyway => no need to round up or down
                lower_points = indices[points[indices, dim] < cutpoint_from]
                upper_points = indices[points[indices, dim] >= cutpoint_from]
                if (lowmean):
                    cutpoint_to = lower_cut
                else:
                    cutpoint_to = upper_cut
    else:
        if len(lower_points) > (cutpoint_from-minpix) * other_dim or len(upper_points) > (maxpix-cutpoint_from) * other_dim:
            top = maxpix - len(upper_points) * 1.0 / other_dim
            bot = minpix + len(lower_points) * 1.0 / other_dim
            if len(lower_points) > len(upper_points):
                cutpoint_to = int(np.floor(bot))  #shift so that the area with most points shifted as little as poss
                #cutpoint_to = int(np.floor(top))  #alternative shift giving area with most points max to play with: seems to give worse results

            elif len(lower_points) < len(upper_points):
                cutpoint_to = int(np.ceil(top))  #shift so that the area with most points shifted as little as poss
                #cutpoint_to = int(np.ceil(bot))  #alternative shift giving area with most points max to play with: seems to give worse results        


    if cutpoint_to is None:
        cutpoint_to = cutpoint_from 
    else:
        # As identified in the Gridfit paper, we may still not be able to fit points into the space, if they fall on the dividing line, e.g.
        # imagine 9 pixels (3x3), with 5 points on one side of the (integer) cut line and 4 on the other. For consistency, and to avoid 2 passes
        # we simply pick a different initial cutoff line, so that one or more points are shifted between the initial lower and upper regions
        #
        # At the same time we can deal with cases when we have 2 identical values, by adding or subtracting a small increment to the first in the list
        cutpoint_to = np.clip(cutpoint_to, minpix+1, maxpix-1) #this means we can get away with fewer recursions

        if len(lower_points) > (cutpoint_to - minpix) * other_dim:
            sorted_indices = indices[np.argsort(points[indices, dim])]
            while True:
                cutoff_index = np.searchsorted(points[sorted_indices, dim], cutpoint_from, 'right')
                if cutoff_index <= (cutpoint_to - minpix) * other_dim:
                    lower_points = sorted_indices[:cutoff_index]
                    upper_points = sorted_indices[cutoff_index:]
                    break;
                below = sorted_indices[cutoff_index + [-1,-2] ]
                if (np.diff(points[below, dim])==0): #rare: only if points have exactly the same value. If so, shift the upper one up a bit
                    points[below[0], dim] += min(0.001, np.diff(points[sorted_indices[slice(cutoff_index-1, cutoff_index+1)], dim]))
                cutpoint_from = np.mean(points[below, dim]) #place new cutpoint between the two points below the current cutpoint

        if len(upper_points) > (maxpix - cutpoint_to) * other_dim:
            sorted_indices = indices[np.argsort(points[indices, dim])]
            while True:
                cutoff_index = np.searchsorted(points[sorted_indices, dim], cutpoint_from, 'left')
                if len(sorted_indices)-cutoff_index <= (maxpix - cutpoint_to) * other_dim:
                    lower_points = sorted_indices[:cutoff_index]
                    upper_points = sorted_indices[cutoff_index:]
                    break;
                above = sorted_indices[cutoff_index + [0,1] ]
                if (np.diff(points[above, dim])==0): #rare: only if points have exactly the same value. If so, shift the lower one down a bit
                    points[above[0], dim] -= min(0.001, np.diff(points[sorted_indices[slice(cutoff_index-1, cutoff_index+1)], dim]))
                cutpoint_from = np.mean(points[above, dim]) #place new cutpoint above the two points below the current cutpoint


        #transform so that lower set of points runs from minpix .. cutpoint_to instead of minpix ... cutpoint_from
        points[lower_points, dim] = (points[lower_points, dim] - minpix) * (cutpoint_to - minpix)/(cutpoint_from - minpix) + minpix
        #scale so that upper set of points runs from cutpoint_to .. maxpix instead of cutpoint_from ... maxpix
        points[upper_points, dim] = (points[upper_points, dim] - cutpoint_from) * (maxpix - cutpoint_to)/(maxpix - cutpoint_from) + cutpoint_to

    select_dim = np.array([1-dim, dim])
    bisect(points, lower_points, bottom_left, top_right * (1-select_dim) + cutpoint_to * select_dim)
    bisect(points, upper_points, bottom_left * (1-select_dim) + cutpoint_to * select_dim, top_right)


#visualise an example
from Tkinter import *
n_pix, scale = 500, 15
np.random.seed(12345)
#test on 2 normally distributed point clouds
all_points = np.vstack((np.random.randn(n_pix//2, 2) * 3 + 30, np.random.randn(n_pix//2, 2) * 6  + 2))
#all_points = np.rint(all_points*50).astype(np.int)/50.0 #test if the algorithm works with rounded
bl, tr = np.floor(np.min(all_points, 0)), np.ceil(np.max(all_points, 0))

print "{} points to distribute among {} = {} pixels".format(all_points.shape[0], "x".join(np.char.mod("%i", tr-bl)), np.prod(tr-bl))
if np.prod(tr-bl) > n_pix:
    pts = all_points.copy()
    bisect(all_points, np.arange(all_points.shape[0]), bl, tr) 
    print np.hstack((pts,all_points))
    print "Mean distance between original and new point = {}".format(np.mean(np.sqrt(np.sum((pts - all_points)**2, 1))))

    master = Tk()
    hw = (tr-bl)* scale +1
    win = Canvas(master, width=hw[1], height=hw[0])
    win.pack()
    all_points = (all_points-bl) * scale
    pts = (pts-bl) * scale
    for i in range(pts.shape[0]):
        win.create_line(int(pts[i,1]), int(pts[i,0]), int(all_points[i,1]), int(all_points[i,0]))
    for i in range(all_points.shape[0]):
        win.create_oval(int(pts[i,1])-2, int(pts[i,0])-2, int(pts[i,1])+2, int(pts[i,0])+2, fill="blue")
    for i in range(all_points.shape[0]):
        win.create_oval(int(all_points[i,1])-3, int(all_points[i,0])-3, int(all_points[i,1])+3, int(all_points[i,0])+3, fill="red")
    mainloop()

像素位移算法可在无像素重叠的情况下显示数万个XY点

2 个答案: