我正在尝试在python中构建一个2-3-4树。到目前为止,插入似乎正在高达3左右的节点。之后,数据似乎被丢弃而不是插入树中。我不确定为什么会发生这种情况,并且多次检查我的代码。我在插入代码附近评论了我从中获得了插入算法。提前感谢您对我的问题的任何见解。
import re
class Node:
def __init__(self, newInfo = None, p = None, q = None,
r = None, s = None, parent = None):
#initilize parent / child links
self.children = [p, q, r, s]
self.parent = parent
#initialize data
self.info = [ newInfo, None, None ]
class TwoThreeFourTree:
def __init__(self):
self.root = Node()
# Built in sort sinks None types to front, which
# is opposite of how info was structured to work,
# Using this sort will push None values to back.
def sortNode(self, curr):
c = []
for i in curr.info:
if i is not None:
c.append(i)
c.sort()
for i in range(3-len(c)):
c.append(None)
return c
# Built in len counts None, this one doesn't.
def length(self, node):
i = 0
for info in node:
if info is not None:
i = i + 1
return i
def isLeaf(self, node):
for child in node.children:
if child is not None:
return False
return True
def isOrphan(self, node):
if node.parent is None:
return True
return False
def lookup(self, userStr, reg):
curr = self.root
while curr is not None:
#Two Node
if self.length(curr.info) is 1:
if re.match(reg, str(curr.info[0])) is not None:
return curr.info[0]
else:
if userStr < curr.info[0]:
curr = curr.children[0]
else:
curr = curr.children[1]
#Three Node
elif self.length(curr.info) is 2:
for item in curr.info:
if re.match(reg, str(item)) is not None:
return item
if userStr < curr.info[0]:
curr = curr.children[0]
elif userStr < curr.info[1]:
curr = curr.children[1]
else:
curr = curr.children[2]
#Four Node
elif self.length(curr.info) is 3:
for item in curr.info:
if item is not None:
if re.match(reg, str(item)) is not None:
return item
if userStr < curr.info[0]:
curr = curr.children[0]
elif userStr < curr.info[1]:
curr = curr.children[1]
elif userStr < curr.info[2]:
curr = curr.children[2]
else:
curr = curr.children[3]
def inorder(self, node, retlst = None):
if retlst is None:
retlst = []
if node.children[0]:
retlst = self.inorder(node.children[0], retlst)
retlst += [node.info[0]]
if node.children[1]:
retlst = self.inorder(node.children[1], retlst)
retlst += [node.info[1]]
if node.children[2]:
retlst = self.inorder(node.children[2], retlst)
retlst += [node.info[2]]
if node.children[3]:
retlst = self.inorder(node.children[3], retst)
return retlst
## Using Algorithm from: http://www.clear.rice.edu/comp212/01-fall/lectures/33/
def insert(self, info, node):
curr = node
if self.length(curr.info) == 0: # curr is empty
curr.info[0] = info
return True
elif self.length(curr.info) == 1: # curr is two node.
if self.isLeaf(curr):
curr.info[1] = info
curr.info = self.sortNode(curr)
return True
else:
if info < curr.info[0]:
self.insert(info, curr.children[0])
else:
self.insert(info, curr.children[1])
elif self.length(curr.info) == 2: # curr is 3 node
if self.isLeaf(curr):
curr.info[2] = info
curr.info = self.sortNode(curr)
return True
else:
if info < curr.info[0]:
self.insert(info, curr.children[0])
elif info < curr.info[1]:
self.insert(info, curr.children[1])
elif info > curr.info[2]:
self.insert(info, curr.children[2])
elif self.length(curr.info) == 3: # curr is 4 node
if self.isOrphan(curr): # curr has no parent
curr = Node(curr.info[1],
Node(curr.info[0], curr.children[0], curr.children[1]),
Node(curr.info[2], curr.children[2], curr.children[3]))
for child in curr.children:
if child is not None:
child.parent = curr
self.root = curr
self.insert(info, self.root)
else: #curr has a parent
if self.length(curr.parent.info) == 1: # Parent is Two Node:
if curr.parent.children[0] == curr:# cur is lst.
#If P = [curr, M, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M, p-rst].
curr.parent.info[1] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[2] = curr.parent.children[1]
curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[1] == curr: # curr is rst.
#If P = [p-lst, M, curr], then P becomes [p-lst, M, [lst, X, mlst], Y, [mrst, Z, rst]].
curr.parent.info[1] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif self.length(curr.parent.info) == 2: # Parent is Three Node:
if curr.parent.children[0] == curr: # curr is lst
#If P = [curr, M1, p-mst, M2, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M1, p-mst, M2, p-rst].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = curr.parent.children[2]
curr.parent.children[2] = curr.parent.children[1]
curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[1] == curr: # curr is mst
#If P = [p-lst, M1, curr, M2, p-rst], then P becomes [p-lst, M1,[lst, X, mlst], Y, [mrst, Z, rst], M2, p-rst].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = curr.parent.children[2]
curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[2] == curr: # curr is rst
#If P = [p-lst, M1, p-mst, M2, curr], then P becomes [p-lst, M1, p-mst, M2, [lst, X, mlst], Y, [mrst, Z, rst]].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[2] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
答案 0 :(得分:2)
作为部分答案,您可以从消除代码中的大部分重复开始。例如,对于三种不同的节点类型,您不需要特殊情况lookup()
函数。你可以改写这样的东西:
def lookup(self, s, r, node=None):
if not node: node = self.root
for item in node.info:
if re.match(r, str(item)):
return item
for (i, item) in enumerate(curr.info):
if s < item:
return self.lookup(s, r, curr.children[i])
return self.lookup(s, r, curr.children[-1]) # Assume len(children) == len(info) + 1
此外,遍历应该作为生成器实现,并再次使用循环而不是重复代码:
def inorder(self, node):
for (i, item) in enumerate(node.info):
if node.children[i]: self.inorder(node.children[i])
yield item
if node.children[-1]: self.inorder(node.children[-1])
这种清理首先会让其他人更容易诊断问题。其次,您可能会发现问题在此过程中消失了。