我尝试编写Python 3.4代码来从外部索引文本文档 这是我的尝试。运行时出现错误信息:
未定义原始输入
我想要的是:
代码:
import string
def RemovePunc():
line = []
i = 0
text_input = ""
total_text_input = "C:Users\Kelil\Desktop\IRS_Assignment\project.txt"
#This part removes the punctuation and converts input text to lowercase
while i != 1:
text_input = raw_input
if text_input == ".":
i = 1
else:
new_char_string = ""
for char in text_input:
if char in string.punctuation:
char = " "
new_char_string = new_char_string + char
line = line + [new_char_string.lower()]
#This is a list with all of the text that was entered in
total_text_input = (total_text_input + new_char_string).lower()
return line
def RemoveStopWords(line):
line_stop_words = []
stop_words = ['a','able','about','across','after','all','almost','also','am','among',
'an','and','any','are','as','at','be','because','been','but','by','can',
'cannot','could','dear','did','do','does','either','else','ever','every',
'for','from','get','got','had','has','have','he','her','hers','him','his',
'how','however','i','if','in','into','is','it','its','just','least','let',
'like','likely','may','me','might','most','must','my','neither','no','nor',
'not','of','off','often','on','only','or','other','our','own','rather','said',
'say','says','she','should','since','so','some','than','that','the','their',
'them','then','there','these','they','this','tis','to','too','twas','us',
'wants','was','we','were','what','when','where','which','while','who',
'whom', 'why', 'will', 'with', 'would', 'yet', 'you', 'your']
#this part removes the stop words for the list of inputs
line_stop_words = []
sent = ""
word = ""
test = []
for sent in line:
word_list = string.split(sent)
new_string = ""
for word in word_list:
if word not in stop_words:
new_string = new_string + word + " "
new_string = string.split(new_string)
line_stop_words = line_stop_words + [new_string]
return(line_stop_words)
def StemWords(line_stop_words):
leaf_words = "s","es","ed","er","ly","ing"
i=0
while i < 6:
count = 0
length = len(leaf_words[i])
while count < len(line_stop_words):
line = line_stop_words[count]
count2 = 0
while count2 < len(line):
#line is the particular list(or line) that we are dealing with, count if the specific word
if leaf_words[i] == line[count2][-length:]:
line[count2] = line[count2][:-length]
count2 = count2 + 1
line_stop_words[count] = line
count2 = 0
count = count + 1
count = 0
i = i + 1
return(line_stop_words)
def indexDupe(lineCount,occur):
if str(lineCount) in occur:
return True
else:
return False
def Indexing(line_stop_words):
line_limit = len(line_stop_words)
index = []
line_count = 0
while line_count < line_limit:
for x in line_stop_words[line_count]:
count = 0
while count <= len(index):
if count == len(index):
index = index + [[x,[str(line_count+1)]]]
break
else:
if x == index[count][0]:
if indexDupe(line_count+1,index[count][1]) == False:
index[count][1] += str(line_count+1)
break
count = count + 1
line_count = line_count + 1
return(index)
def OutputIndex(index):
print ("Index:")
count = 0
indexLength = len(index)
while count < indexLength:
print (index[count][0],)
count2 = 0
lineOccur = len(index[count][1])
while count2 < lineOccur:
print (index[count][1][count2],)
if count2 == lineOccur -1:
print ("")
break
else:
print (",",)
count2 += 1
count += 1
line = RemovePunc()
line_stop_words = RemoveStopWords(line)
line_stop_words = StemWords(line_stop_words)
index = Indexing(line_stop_words)
OutputIndex(index)
答案 0 :(得分:0)
@smichak已经在评论中提出了正确的答案。在Python 3中,raw_input
被重命名为input
。所以你想要:
text_input = input()
不要忘记这些括号,因为你想调用这个函数。