我正在研究一个读取数据文件的索引词典,并在AVL树中记录每个唯一的单词和单词的行号。问题是我的find方法没有在树中找到Entry,所以它添加了每个单词而不是每个唯一单词。
我也无法让我的程序保留每个条目中的行号列表。我正在使用一个入门类来保存密钥(字)和行号列表。谢谢你的帮助。
我正在用Python 2.7编写,到目前为止已经包含了我的所有程序。
我的主要计划:
import string #NEW
from time import clock
import sys #for BST recursion limit
from dictionary import Entry
sys.setrecursionlimit(3000)#for BST
from avl import AVL
def main():
"""Calls on necessary functions to fill the dictionary, and process the keys"""
start = clock() #times runtime
stopWordDict = AVL()#Empty Dictionary
stopWordDict = fillStopWordDict(stopWordDict)
keyList = []
wordConcordanceDict = AVL()#Empty Dictionary
wordConcordanceDict = fillWordDict(stopWordDict,wordConcordanceDict, keyList)
print str(wordConcordanceDict) #wordconcorddict made here.
keyList.sort()
print keyList
writeWordConDict(wordConcordanceDict, keyList)
end = clock() #gets runtime
runTime = end - start
print("Done. Runtime was:",runTime,"seconds.")
def fillStopWordDict(stopWordDict):
"""fills chain dict with all of the stop words"""
fileNew=open('stop_words.txt', "r")
for word in fileNew:
word=word.lower().strip() #strip will strip \n from word
if stopWordDict.find(word) == None:
stopWordDict.add(word)
fileNew.close()
return stopWordDict
def fillWordDict(stopWordDict,wordConcordanceDict, keyList):
"""opens hw5data.txt and calls on processLine function"""
lineCounter = 1
fileNew=open('hw5data.txt', "r")
for line in fileNew:
processLine(lineCounter, line, stopWordDict,wordConcordanceDict, keyList)
lineCounter+=1 #changes to next line of file
fileNew.close()
return wordConcordanceDict
def processLine(lineCounter, line, stopWordDict,wordConcordanceDict, keyList):
"""process each line into the wordConcordanceDict"""
line=line.split() #splits line into list of words
for word in line:
word=word.lower().strip(string.punctuation)#strips punctuation
if stopWordDict.find(word) == None:
wordEntry = Entry(word, None)
if wordConcordanceDict.find(wordEntry) == None:
lineList = wordEntry.value
lineList.append(lineCounter)
wordEntry.value = lineList
wordConcordanceDict.add(wordEntry)
keyList.append(word)
else:
wordEntry = wordConcordance.find(wordEntry)
lineList = wordEntry.value
lineList.append(lineCounter)
wordEntry.value = lineList
wordConcordanceDict.add(wordEntry)
return wordConcordanceDict
def writeWordConDict(wordConcordanceDict, keyList):
"""takes in wordConcordanceDict and list of its keys. Then prints the key value pairs to the screen"""
fileNew=open("ProgProj5Concordance.txt", 'w')
# listOfWords = wordConcordanceDict.inorder()
for key in keyList:
wordEntry = wordConcordanceDict.find(key) #makes the values into a string
lineList = wordEntry.value
line=str(key + ":" + lineList + "\n")
fileNew.write(line)
fileNew.close()
main()
MY ENTRY CLASS: “”” 文件:bst.py 二叉搜索树的BST类。 “”“
from queue import LinkedQueue
from binarytree import BinaryTree
class BST(object):
def __init__(self):
self._tree = BinaryTree.THE_EMPTY_TREE
self._size = 0
def isEmpty(self):
return len(self) == 0
def __len__(self):
return self._size
def __str__(self):
return str(self._tree)
def __iter__(self):
return iter(self.inorder())
def find(self, target):
"""Returns data if target is found or None otherwise."""
def findHelper(tree):
if tree.isEmpty():
return None
elif target == tree.getRoot():
return tree.getRoot()
elif target < tree.getRoot():
return findHelper(tree.getLeft())
else:
return findHelper(tree.getRight())
return findHelper(self._tree)
def add(self, newItem):
"""Adds newItem to the tree."""
# Helper function to search for item's position
def addHelper(tree):
currentItem = tree.getRoot()
left = tree.getLeft()
right = tree.getRight()
# New item is less, go left until spot is found
if newItem < currentItem:
if left.isEmpty():
tree.setLeft(BinaryTree(newItem))
else:
addHelper(left)
# New item is greater or equal,
# go right until spot is found
elif right.isEmpty():
tree.setRight(BinaryTree(newItem))
else:
addHelper(right)
# End of addHelper
# Tree is empty, so new item goes at the root
if self.isEmpty():
self._tree = BinaryTree(newItem)
# Otherwise, search for the item's spot
else:
addHelper(self._tree)
self._size += 1
def inorder(self):
"""Returns a list containing the results of
an inorder traversal."""
lyst = []
self._tree.inorder(lyst)
return lyst
def preorder(self):
"""Returns a list containing the results of
a preorder traversal."""
# Exercise
pass
def postorder(self):
"""Returns a list containing the results of
a postorder traversal."""
# Exercise
pass
def levelorder(self):
"""Returns a list containing the results of
a levelorder traversal."""
# Exercise
pass
def remove(self, item):
# Exercise
pass
def main():
tree = BST()
print "Adding D B A C F E G"
tree.add("D")
tree.add("B")
tree.add("A")
tree.add("C")
tree.add("F")
tree.add("E")
tree.add("G")
print tree.find("A")
print tree.find("Z")
print "\nString:\n" + str(tree)
print "Iterator (inorder traversal): "
iterator = iter(tree)
while True:
try:
print iterator.next(),
except Exception, e:
print e
break
# Use a for loop instead
print "\nfor loop (inorder traversal): "
for item in tree:
print item,
if __name__ == "__main__":
main()
最后是BINARY TREE AVL CLASS:
from binarytree import *
class BinaryTreeAVL(BinaryTree):
def __init__(self, item, balance = 'EQ'):
BinaryTree.__init__(self, item)
self._balance = balance
def getBalance(self):
return self._balance
def setBalance(self, newBalance):
self._balance = newBalance
def __str__(self):
"""Returns a string representation of the tree
rotated 90 degrees to the left."""
def strHelper(tree, level):
result = ""
if not tree.isEmpty():
result += strHelper(tree.getRight(), level + 1)
result += "| " * level
result += str(tree.getRoot())+ " : " + tree.getBalance() + "\n"
result += strHelper(tree.getLeft(), level + 1)
return result
return strHelper(self, 0)