我在Python中编写的四叉树类有一个问题。我使用四叉树有效地交叉关联大型(100,000多个条目)数据目录,并且我从四叉树中得到的匹配记录省略了目录的大区域。这是整个四叉树代码。与匹配源相关的函数是nearersource()和nearestsource(),
import math
import geom_utils as gu
MAX = 60
class Quadtree(object):
"""
Quadtree base class. Only functions that are agnostic to
the type of coordinate system or source object used. Must
use a subclass.
"""
def __init__(self, xmin, ymin, xmax, ymax):
self.top = Node(xmin, ymin, xmax, ymax)
def inserttonode(self, node, source):
if len(node.contents) == MAX:
self.subdivide(node)
if node.q1:
self.inserttoquad(node, source)
else:
# If no subquads exist add source to the list in CONTENTS element
node.contents.append(source)
def inserttoquad(self, node, source):
if source.x >= node.xmid:
if source.y >= node.ymid:
quadrant = node.q1
else:
quadrant = node.q4
else:
if source.y >= node.ymid:
quadrant = node.q2
else:
quadrant = node.q3
self.inserttonode(quadrant, source)
def subdivide(self, node):
node.q1 = Node(node.xmid, node.ymid, node.xmax, node.ymax)
node.q2 = Node(node.xmin, node.ymid, node.xmid, node.ymax)
node.q3 = Node(node.xmin, node.ymin, node.xmid, node.ymid)
node.q4 = Node(node.xmid, node.ymin, node.xmax, node.ymid)
# Pop the list and insert the sources as they come off
while node.contents:
self.inserttoquad(node, node.contents.pop())
def match(self, x, y):
return self.nearestsource(self, x, y)
def nearestsource(self, tree, x, y):
nearest = {'source':None, 'dist':0}
nearest['dist'] = self.initial_dist(tree.top.xmax, tree.top.xmin,
tree.top.ymax, tree.top.ymin)
interest = {'xmin':x-nearest['dist'], 'ymin':y-nearest['dist'],
'xmax':x+nearest['dist'], 'ymax':y+nearest['dist']}
interest = gu.clip_box(interest['xmin'], interest['ymin'],
interest['xmax'], interest['ymax'],
tree.top.xmin, tree.top.ymin,
tree.top.xmax, tree.top.ymax)
nearest['dist'] = nearest['dist']*nearest['dist']
self.nearersource(tree, tree.top, x, y, nearest, interest)
return nearest['source']
def nearersource(self, tree, node, x, y, nearest, interest):
self.num_nearersources+=1
if gu.intersecting((node.xmin), (node.xmax),
(node.ymin), (node.ymax),
(interest['xmin']), (interest['xmax']),
(interest['ymin']), (interest['ymax'])):
if node.q1 == None:
for s in node.contents:
s_dist = self.norm2(s.x, s.y, x, y)
if s_dist < nearest['dist']:
nearest['source'] = s.source
nearest['dist'] = s_dist
dist = math.sqrt(s_dist)
interest['xmin'] = x - dist
interest['ymin'] = y - dist
interest['xmax'] = x + dist
interest['ymax'] = y + dist
interest = gu.clip_box(interest['xmin'], interest['ymin'],
interest['xmax'], interest['ymax'],
tree.top.xmin, tree.top.ymin,
tree.top.xmax, tree.top.ymax)
else:
self.nearersource(tree, node.q1, x, y, nearest, interest)
self.nearersource(tree, node.q2, x, y, nearest, interest)
self.nearersource(tree, node.q3, x, y, nearest, interest)
self.nearersource(tree, node.q4, x, y, nearest, interest)
class Node(object):
def __init__(self, xmin, ymin, xmax, ymax):
self.xmin = float(xmin)
self.ymin = float(ymin)
self.xmax = float(xmax)
self.ymax = float(ymax)
self.xmid = float((self.xmin + self.xmax)/2.0)
self.ymid = float((self.ymin + self.ymax)/2.0)
self.q1 = self.q2 = self.q3 = self.q4 = None
self.contents = []
class Point(object):
"""
The point of Point (heh.) is to have a uniform object that
can be passed around the Quadtree. This makes for
easy switching between equatorial and pixel coordinate
systems or different objects.
"""
def __init__(self, source, x, y):
self.source = source
self.x = float(x)
self.y = float(y)
class ScamPixelQuadtree(Quadtree):
def __init__(self, xmin, ymin, xmax, ymax):
super(ScamPixelQuadtree, self).__init__(xmin, ymin, xmax, ymax)
def insert(self, source):
self.inserttonode(self.top, Point(source, source.ximg, source.yimg))
def norm2(self, x1, y1, x2, y2):
return gu.pixnorm2(x1, y1, x2, y2)
def initial_dist(self, x2, x1, y2, y1):
return min(x2 - x1, y2 - y1)/1000.0
class ScamEquatorialQuadtree(Quadtree):
def __init__(self, xmin, ymin, xmax, ymax):
super(ScamEquatorialQuadtree, self).__init__(xmin, ymin, xmax, ymax)
def insert(self, source):
self.inserttonode(self.top, Point(source, source.ra, source.dec))
def norm2(self, x1, y1, x2, y2):
return gu.equnorm2(x1, y1, x2, y2)
def initial_dist(self, x2, x1, y2, y1):
return min(x2 - x1, y2 - y1)/100.0
我尝试使MAX值非常大,实际上将树变成了一个花哨的数组,匹配工作正常(虽然很慢)。所以我认为问题可能出在nearersource()函数中,这个函数控制我们是否会进入一个区域进行实际比较,但我无法弄清楚是什么。
我在C中编写了相同的代码,完全相同的算法,并且它工作正常。没有地区遗漏。因此,我一直在试图考虑语言特定的原因,为什么算法在C语言中起作用而在Python中起作用。我想到的第一件事就是精度,所以我尝试使用BigFloat包来代码中控制代码是否进入某个区域的部分,但它没有帮助。 C或Python中是否存在可能导致Python出现此问题的其他差异?