我正在尝试读取一个大文本文件(1.08 MB,116253个单词),我遇到一个问题,程序停止在文本文件的大约1/20的位置(在{{1}中停止函数,甚至不执行Read
语句。)
这是我现在拥有的功能:
print
给我的binary_search_tree和tree_node,因为这是一个家庭作业,所以我假设它是有效的,因为它之前已被使用过。
有没有人知道问题是什么?
二进制搜索树:
from binary_search_tree import BinarySearchTree
from tree_node import TreeNode
import sys
sys.setrecursionlimit(5000000)
MovieTree = BinarySearchTree()
def Read(filename):
file = open('movieData.txt')
for line in file:
MovieTree[line.strip()] = line
print(line)
file.close()
Read('movieData.txt')
print(MovieTree)
树节点:
from tree_node import TreeNode
class BinarySearchTree:
def __init__(self):
self.root = None
self.size = 0
def length(self):
return self.size
def __len__(self):
return self.size
def __iter__(self):
return self.root.__iter__()
def __str__(self):
"""Returns a string representation of the tree
rotated 90 degrees counter-clockwise"""
def strHelper(root, level):
resultStr = ""
if root:
resultStr += strHelper(root.rightChild, level+1)
resultStr += "| " * level
resultStr += str(root.key) + "\n"
resultStr += strHelper(root.leftChild, level+1)
return resultStr
return strHelper(self.root, 0)
def __contains__(self,key):
if self._get(key,self.root):
return True
else:
return False
def get(self,key):
if self.root:
res = self._get(key,self.root)
if res:
return res.payload
else:
return None
else:
return None
def _get(self,key,currentNode):
if not currentNode:
return None
elif currentNode.key == key:
return currentNode
elif key < currentNode.key:
return self._get(key,currentNode.leftChild)
else:
return self._get(key,currentNode.rightChild)
def __getitem__(self,key):
return self.get(key)
def __setitem__(self,k,v):
self.put(k,v)
def put(self,key,val):
if self.root:
self._put(key,val,self.root)
else:
self.root = TreeNode(key,val)
self.size = self.size + 1
def _put(self,key,val,currentNode):
if key < currentNode.key:
if currentNode.hasLeftChild():
self._put(key,val,currentNode.leftChild)
else:
currentNode.leftChild = TreeNode(key,val,
parent=currentNode)
else:
if currentNode.hasRightChild():
self._put(key,val,currentNode.rightChild)
else:
currentNode.rightChild = TreeNode(key,val,
parent=currentNode)
def delete(self,key):
if self.size > 1:
nodeToRemove = self._get(key,self.root)
if nodeToRemove:
self.remove(nodeToRemove)
self.size = self.size-1
else:
raise KeyError('Error, key not in tree')
elif self.size == 1 and self.root.key == key:
self.root = None
self.size = self.size - 1
else:
raise KeyError('Error, key not in tree')
def __delitem__(self,key):
self.delete(key)
def remove(self,currentNode):
if currentNode.isLeaf(): #leaf
if currentNode == currentNode.parent.leftChild:
currentNode.parent.leftChild = None
else:
currentNode.parent.rightChild = None
elif currentNode.hasBothChildren(): #interior
succ = currentNode.findSuccessor()
succ.spliceOut()
currentNode.key = succ.key
currentNode.payload = succ.payload
else: # this node has one child
if currentNode.hasLeftChild():
if currentNode.isLeftChild():
currentNode.leftChild.parent = currentNode.parent
currentNode.parent.leftChild = currentNode.leftChild
elif currentNode.isRightChild():
currentNode.leftChild.parent = currentNode.parent
currentNode.parent.rightChild = currentNode.leftChild
else:
currentNode.replaceNodeData(currentNode.leftChild.key,
currentNode.leftChild.payload,
currentNode.leftChild.leftChild,
currentNode.leftChild.rightChild)
else:
if currentNode.isLeftChild():
currentNode.rightChild.parent = currentNode.parent
currentNode.parent.leftChild = currentNode.rightChild
elif currentNode.isRightChild():
currentNode.rightChild.parent = currentNode.parent
currentNode.parent.rightChild = currentNode.rightChild
else:
currentNode.replaceNodeData(currentNode.rightChild.key,
currentNode.rightChild.payload,
currentNode.rightChild.leftChild,
currentNode.rightChild.rightChild)
答案 0 :(得分:1)
此处也发生堆栈溢出崩溃。
由于您的代码不包含递归,因此它必须在您老师给您的课程中。我认为溢出发生在put(self, key, val)
方法中。
我担心我几乎不懂Python,所以我不能给你任何进一步的帮助。
答案 1 :(得分:1)
1.08 MB是一个小文件。为什么要使用BinarySearchTree?您可以通过将数据转储到字典中来轻松处理此问题。
如果BinarySearchTree是作业的一部分,它看起来它有bug。我将从_put()方法开始跟踪。
顺便说一句,你不应该使用sys.setrecursionlimit(5000000)
来解决问题。除非您的数据大小在2 ^ 5000000的数量级,否则合适的二进制搜索不会达到此限制。对于116253个单词,你有一个平衡的二叉树应该只需要12级递归。