任何输入都将非常感激。我已经被困在这一段很长一段时间了,有点绝望。下面是我的代码的一部分,它应该读取文本文件并打印关键字以及该关键字的出现次数。我创建了一个带键但没有值的字典。当我尝试添加值时,我收到此错误消息:“TypeError:list indices必须是整数或切片,而不是dict”。为这行代码生成错误:
position_term = {uat_dic[key_word], word_position}'.
我可以根据需要回答任何问题或提供其他信息。非常感谢你给予的任何帮助。
import string
def main():
# Call a function to create the keyword frequency dictionary
create_keyword_frequency_dictionary()
# Call a function to calculate the keyword properties: frequency and position
keyword_frequency, keyword_position = calculate_keyword_properties()
# Call a function to display the results as in the Sample Output
display_results(keyword_frequency, keyword_position)
def create_keyword_frequency_dictionary():
# Open the uat_voc.txt file and create a dictionary where
# the key is the term found in the file, and the value is initialized to 0.
# This function only initializes the dictionary.
# Values for the keyword frequencies are set by the calculate_keyword_properties function.
#easy to use variable name that holds the file path, can easily change the file location here
file_name = "/Users/ccs/Library/Mobile Documents/com~apple~TextEdit/Documents/uat_voc.txt"
#create an empty dictionary
keyword_frequency_dictionary = []
#wrap the i/o in a try/catch statement
try:
#open the file and load it into infile
infile = open(file_name, 'r')
#for each line in infile
for line in infile:
#take the line, slice off the \n character
line = line[:-1]
#create a dictionary term where the key is the word in line, initialize value to 0
dic_term = {line: 0}
#add the dictionary term to the dictionary list
keyword_frequency_dictionary.append(dic_term)
#close the infile
infile.close()
# Catch IO Errors, with the File Not Found error the primary possible problem to detect.
except FileNotFoundError:
print("File not found when attempting to read", file_name)
return None
except IOError:
print("Error in data file when reading", file_name)
infile = None
infile.close()
return None
return keyword_frequency_dictionary
def calculate_keyword_properties():
# Open the HowBigDataIsChangingAstronomy.txt' file
# Read each line in the file and normalize the text as in Assignment 3
# For each word, determine if it is in the keyword_frequency dictionary, i.e, it is a keyword in the UAT vocabulary,
# If the word is a UAT keyword, then increment the frequency.
# For each word, if it is the first occurrence in the file, then save its position in a keyword_position dictionary
# Open the HowBigDataIsChangingAstronomy.txt' file
infile = open('/Users/ccs/Library/Mobile Documents/com~apple~TextEdit/Documents/HowBigDataIsChangingAstronomy.txt','r')
#read the entire file into clean_file and normalize (this file still has '' instead of ---) length = 2982
clean_file = remove_punctuation(infile.read())
#create empty list that will fill with words
clean_list_of_words = []
#create the keyword dictionary
uat_dic = create_keyword_frequency_dictionary()
#create the keyword_position dictionary
keyword_position_dictionary = []
print(clean_file)
print(len(clean_file))
#reparse the file and remove all '' from normalized file (length = 2972)
for line in clean_file:
for word in line.split():
clean_list_of_words.append(word)
print(len(clean_list_of_words))
#iterate through the clean list of words one word at a time to compare; store the word in word_position
for word_position in clean_list_of_words:
#iterate through the dictionary so you can compare the word to each entry
for key_word in iter(uat_dic):
#IDK what was happening here, i think this is wrong
list_word = clean_list_of_words.index(word_position)
dic_word = uat_dic.index(key_word)
#compare the word from word_position to each key value in the uat_dictionary
if list_word == dic_word:
#if it was a match, get the value, store it in frequency
frequency = uat_dic.index(key_word)
#increment frequency
frequency+=1
#check to see if this was the first occurance of the term
if frequency == 1:
#if this was the first occurrence, then store a dictionary key/value pair with word_position as the place in the document
#uat_dic = {uat_dic[key_word], word_position}
position_term = {uat_dic[key_word], word_position}
#add the dictionary pair to the key word position list
keyword_position_dictionary.append(position_term)
#update the frequency value in the uat dictionary
uat_dic[key_word] = frequency
#keeps looping through for every word in the text document
#return the uat frequency dictionary and the keyword position dictionary
return uat_dic, keyword_position_dictionary
# For each word, determine if it is in the keyword_frequency dictionary, i.e, it is a keyword in the UAT vocabulary,
# If the word is a UAT keyword, then increment the frequency.
# For each word, if it is the first occurrence in the file, then save its position in a keyword_position dictionary
答案 0 :(得分:0)
有时你只需要重构:
def count_word(file_name, word):
return file(file_name).read().count(word)
这将计算文件中word
的出现次数,这就是您尝试做的事情。
由于您要求计算关键字列表,所以应该这样做:
import collections # this is from the standard library
def count_word(file_name, words):
'''
Takes a file name and a list of words to count.
'''
# initialize a counter
cnt = collections.Counter()
# tokenize the file
tokens = file(file_name).read().strip().split()
for token in tokens:
if token in words:
# increment the counter for that token
cnt[token] += 1
return cnt