所以我应该编写一些代码来接收一些文本行,通过删除所有非关键词,标点符号等来重新编写它们,最后打印每个关键字以及它出现在哪些行上。有一次,我的代码正在工作,似乎没有改变任何东西,我开始得到这个错误,我不知道为什么。我不是在寻找使代码更短或更短的方法,我意识到它远非理想。我想知道如何解决这个错误。
EDIT-错误在第58行,如果word不在索引中,则在createindex中:
以下是代码:
from string import *
# Program to index sentences
stopWords = [ "a", "i", "it", "am", "at", "on", "in", "to", "too", "very", \
"of", "from", "here", "even", "the", "but", "and", "is", "my", \
"them", "then", "this", "that", "than", "though", "so", "are" ]
punctuation = [".",",",":",";","!","?","&","'"]
stemming=["s","es","ed","er","ly","ing"]
text={}
reworkedtext={}
def inserttext(text): #Function to insert lines of text
linecount=1
print "Please insert text here:"
line = ""
while line!=".":
line = raw_input()
text[linecount]=line
linecount+=1
def reworktext(text): #Reworks the text by removing punctuation and making everything lowercase
for line in text:
reworkedtext[line]=""
for character in range(0,len(text[line])):
if text[line][character] not in punctuation:
reworkedtext[line]=reworkedtext[line]+lower(text[line][character])
def removestopwords(reworkedtext): #Removes stopwords
for line in reworkedtext:
wordcount=0
reworkedtext[line]=split(reworkedtext[line])
for word in range(0,len(reworkedtext[line])):
if reworkedtext[line][wordcount] in stopWords:
del(reworkedtext[line][wordcount])
else:
wordcount+=1
def stemwords(reworkedtext): #Stems all words
for line in reworkedtext:
for word in range(0,len(reworkedtext[line])):
if reworkedtext[line][word][-2:] in stemming:
reworkedtext[line][word]=reworkedtext[line][word][:-2]
if reworkedtext[line][word][-3:] in stemming:
reworkedtext[line][word]=reworkedtext[line][word][:-3]
if reworkedtext[line][word][-1:] in stemming:
reworkedtext[line][word]=reworkedtext[line][word][:-1]
def createindex(reworkedtext): #creates index and prints it
linecount=1
for line in reworkedtext:
for word in range(0,len(reworkedtext[line])):
if word not in index:
index[word]=""
index[word]=str(line)
linecount+=1
elif index[word]!=str(line):
index[word]=index[word]+", "+str(line)
for words in index:
print words, index[words]
inserttext(text)
reworktext(text)
removestopwords(reworkedtext)
stemwords(reworkedtext)
createindex(reworkedtext)
答案 0 :(得分:1)
好像你忘了初始化索引词典
def createindex(reworkedtext): #creates index and prints it
linecount=1
index = {} # <----------- add this line and see what's what :)
for line in reworkedtext:
for word in range(0,len(reworkedtext[line])):
if word not in index:
index[word]=""
index[word]=str(line)
linecount+=1
elif index[word]!=str(line):
index[word]=index[word]+", "+str(line)
for words in index:
print words, index[words]