我有一个带有城市名称和GPS坐标的文本文件(UTF-8,~50K行)。示例行:
San Pedro locality -3367 -5968 Argentina Buenos Aires San Pedro
Talagante locality -3366 -7093 Chile Metropolitana Talagante
Peñaflor locality -3362 -7092 Chile Metropolitana Talagante
第三和第四列是最后一列中城市的GPS坐标。
鉴于GPS坐标,我需要找到壁橱城市。我需要做数亿次这样的事情。有哪些工具可以帮助我完成这项任务? Java / Python解决方案将是理想的。
答案 0 :(得分:7)
您要找的是KD tree。我在这里找到了python implementation的链接,但我不是python开发人员,从未尝试过。 KD树将支持在平面中找到最近点的平方根复杂度,这可能是您可以获得的最佳复杂度。你可以负担得起每秒大约一百万个查询。
编辑:其实你的问题让我做了一些更彻底的研究。可能你会发现在this page上被描述为可能的方法很有用。您感兴趣的是为最近邻居的许多查询提供最佳解决方案。
答案 1 :(得分:2)
我会将这些记录存储在SQLite数据库中。有一个名为SpatiaLite的项目,它在SQLite之上添加了空间查询和类型。这使您可以向数据库询问诸如“提供距离x 20米范围内的所有项目”的内容。
如果您不想使用数据库,可以使用quadtree。 Python中有几个实现。四叉树将矩形空间划分为4个部分,然后将每个部分细分为4个部分。搜索非常有效。
答案 2 :(得分:1)
对于您要求的几何查询(最近点)KD Tree是出色的数据结构。除了不是很难实施。我有一个Java实现。不确定它对你有多高效。这是我的任务。 Point2D
和其他实用程序类已实现here。您可以在那里查看他们的源代码。 RectHV
是另一个所需的实用程序类。它是由我写的。
public class RectHV {
private final double xmin, ymin; // minimum x- and y-coordinates
private final double xmax, ymax; // maximum x- and y-coordinates
// construct the axis-aligned rectangle [xmin, xmax] x [ymin, ymax]
public RectHV(double xmin, double ymin, double xmax, double ymax) {
if (xmax < xmin || ymax < ymin) {
throw new IllegalArgumentException("Invalid rectangle");
}
this.xmin = xmin;
this.ymin = ymin;
this.xmax = xmax;
this.ymax = ymax;
}
// accessor methods for 4 coordinates
public double xmin() { return xmin; }
public double ymin() { return ymin; }
public double xmax() { return xmax; }
public double ymax() { return ymax; }
// width and height of rectangle
public double width() { return xmax - xmin; }
public double height() { return ymax - ymin; }
// does this axis-aligned rectangle intersect that one?
public boolean intersects(RectHV that) {
return this.xmax >= that.xmin && this.ymax >= that.ymin
&& that.xmax >= this.xmin && that.ymax >= this.ymin;
}
// draw this axis-aligned rectangle
public void draw() {
StdDraw.line(xmin, ymin, xmax, ymin);
StdDraw.line(xmax, ymin, xmax, ymax);
StdDraw.line(xmax, ymax, xmin, ymax);
StdDraw.line(xmin, ymax, xmin, ymin);
}
// distance from p to closest point on this axis-aligned rectangle
public double distanceTo(Point2D p) {
return Math.sqrt(this.distanceSquaredTo(p));
}
// distance squared from p to closest point on this axis-aligned rectangle
public double distanceSquaredTo(Point2D p) {
double dx = 0.0, dy = 0.0;
if (p.x() < xmin) dx = p.x() - xmin;
else if (p.x() > xmax) dx = p.x() - xmax;
if (p.y() < ymin) dy = p.y() - ymin;
else if (p.y() > ymax) dy = p.y() - ymax;
return dx*dx + dy*dy;
}
// does this axis-aligned rectangle contain p?
public boolean contains(Point2D p) {
return (p.x() >= xmin) && (p.x() <= xmax)
&& (p.y() >= ymin) && (p.y() <= ymax);
}
// are the two axis-aligned rectangles equal?
public boolean equals(Object y) {
if (y == this) return true;
if (y == null) return false;
if (y.getClass() != this.getClass()) return false;
RectHV that = (RectHV) y;
if (this.xmin != that.xmin) return false;
if (this.ymin != that.ymin) return false;
if (this.xmax != that.xmax) return false;
if (this.ymax != that.ymax) return false;
return true;
}
// return a string representation of this axis-aligned rectangle
public String toString() {
return "[" + xmin + ", " + xmax + "] x [" + ymin + ", " + ymax + "]";
}
}
这是KD树:
public class KdTree {
private static class Node {
private Point2D p; // the point
private RectHV rect; // the axis-aligned rectangle corresponding to this node
private Node lb; // the left/bottom subtree
private Node rt; // the right/top subtree
Node() {
p = null;
rect = null;
lb = null;
rt = null;
}
}
private Node tree;
private Point2D nearestPoint, infinitePoint;
private int sz;
private double nearestDist;
// construct an empty set of points
public KdTree() {
tree = new Node();
sz = 0;
infinitePoint = new Point2D(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
}
// is the set empty?
public boolean isEmpty() {
return (sz == 0);
}
// number of points in the set
public int size() {
return sz;
}
////////////////////////////////////////////////
// private function for inserting any element //
////////////////////////////////////////////////
private void privateInsert( Node nd, Point2D p, int lv, double xmin, double ymin, double xmax, double ymax) {
if(nd.p == null) {
nd.p = p;
nd.rect = new RectHV(xmin, ymin, xmax, ymax);
nd.lb = new Node();
nd.rt = new Node();
sz = sz + 1;
}
else if( lv % 2 == 0 ) {
double X = nd.p.x();
double x = p.x();
if( x <= X ) {
xmax = X;
privateInsert(nd.lb, p, lv+1, xmin, ymin, xmax, ymax);
}
else {
xmin = X;
privateInsert(nd.rt, p, lv+1, xmin, ymin, xmax, ymax);
}
}
else {
double Y = nd.p.y();
double y = p.y();
if( y <= Y ) {
ymax = Y;
privateInsert(nd.lb, p, lv+1, xmin, ymin, xmax, ymax);
}
else {
ymin = Y;
privateInsert(nd.rt, p, lv+1, xmin, ymin, xmax, ymax);
}
}
}
////////////////////////////////////////////////
// private function for searching any element //
////////////////////////////////////////////////
private Node privateSearch( Node nd, Point2D p, int lv ) {
if( nd.p == null ) return nd;
else if( p.equals( nd.p ) == true ) return nd;
if(lv % 2 == 0) {
double X = nd.p.x();
double x = p.x();
if( x <= X ) {
return privateSearch( nd.lb, p, lv+1 );
}
else {
return privateSearch( nd.rt, p, lv+1);
}
}
else {
double Y = nd.p.y();
double y = p.y();
if( y <= Y ) {
return privateSearch(nd.lb, p, lv+1);
}
else {
return privateSearch(nd.rt, p, lv+1);
}
}
}
/////////////////////////////////////////////////
// private function for drawing all the points //
/////////////////////////////////////////////////
private void privateDraw (Node nd) {
if(nd.p == null) return;
StdDraw.setPenColor(StdDraw.BLACK);
StdDraw.setPenRadius(.01);
double x = nd.p.x();
double y = nd.p.y();
StdDraw.point( x, y );
privateDraw( nd.lb );
privateDraw( nd.rt );
}
//////////////////////////////////////////
// private function for range searching //
//////////////////////////////////////////
private void privateRange(Node nd, RectHV rect, Queue<Point2D> que){
if(nd.p == null) return;
if( rect.contains( nd.p ) == true ) que.enqueue( nd.p );
if( nd.rect.intersects(rect) == true ) {
privateRange(nd.lb, rect, que);
privateRange(nd.rt, rect, que);
return;
}
else return;
}
//////////////////////////////////////////////////////
// private function for searching nearest neighbour //
//////////////////////////////////////////////////////
private void privateNearest( Node nd, Point2D p ) {
if(nd.p == null) return;
double d = p.distanceSquaredTo(nd.p);
if(d < nearestDist) {
nearestDist = d;
nearestPoint = nd.p;
}
if(nd.lb.p != null && ( nd.lb.rect.distanceSquaredTo(p) < nearestDist) ) privateNearest(nd.lb, p);
if(nd.rt.p != null && ( nd.rt.rect.distanceSquaredTo(p) < nearestDist) ) privateNearest(nd.rt, p);
}
// add the point p to the set (if it is not already in the set)
public void insert(Point2D p) {
if( contains( p ) == true ) {
return;
}
else {
privateInsert(tree, p, 0, 0.00, 0.00, 1.00, 1.00);
}
}
// does the set contain the point p?
public boolean contains(Point2D p) {
Node nd = privateSearch(tree, p, 0);
if(nd.p == null) return false;
else return true;
}
// draw all of the points to standard draw
public void draw() {
privateDraw(tree);
}
// all points in the set that are inside the rectangle
public Iterable<Point2D> range( RectHV rect ) {
Queue<Point2D> que = new Queue<Point2D>();
privateRange(tree, rect, que);
return que;
}
// a nearest neighbor in the set to p; null if set is empty
public Point2D nearest(Point2D p) {
nearestPoint = infinitePoint;
nearestDist = Double.POSITIVE_INFINITY;
privateNearest(tree, p);
return nearestPoint;
//return p;
}
}
答案 3 :(得分:1)
GeoHash是你的另一个选择,它实现起来非常快,效率很高。