我已经创建了一个函数,我是否计算了文件中每个单词的使用次数,即单词频率。现在,该函数可以计算所有单词的总和,并向我显示七个最常用的单词以及它们被使用了多少次。现在我想比较我的第一个文件我已经用另一个文件分析了单词频率我是否有英语中最常用的单词,我想将这些单词与我在第一个文件中的单词进行比较,看看是否任何单词匹配。
我所要做的是制作两个文件的列表,然后将它们相互比较。但是我为此写的代码并没有给我任何输出,关于如何解决这个问题的任何想法?
def CountWords():
filename = input('What is the name of the textfile you want to open?: ')
if filename == "alice" or "alice-ch1.txt" or " ":
file = open("alice-ch1.txt","r")
print('You want to open alice-ch1.txt')
wordcount = {}
for word in file.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
wordcount = {k.lower(): v for k, v in wordcount.items() }
print (wordcount)
sum = 0
for val in wordcount.values():
sum += val
print ('The total amount of words in Alice adventures in wonderland: ' + str(sum))
sortList = sorted(wordcount.values(), reverse = True)
most_freq_7 = sortList[0:7]
#print (most_freq_7)
print ('Totoro says: The 7 most common words in Alice Adventures in Wonderland:')
print(list(wordcount.keys())[list(wordcount.values()).index(most_freq_7[0])] + " " + str(most_freq_7[0]))
print(list(wordcount.keys())[list(wordcount.values()).index(most_freq_7[1])] + " " + str(most_freq_7[1]))
print(list(wordcount.keys())[list(wordcount.values()).index(most_freq_7[2])] + " " + str(most_freq_7[2]))
print(list(wordcount.keys())[list(wordcount.values()).index(most_freq_7[3])] + " " + str(most_freq_7[3]))
print(list(wordcount.keys())[list(wordcount.values()).index(most_freq_7[4])] + " " + str(most_freq_7[4]))
print(list(wordcount.keys())[list(wordcount.values()).index(most_freq_7[5])] + " " + str(most_freq_7[5]))
print(list(wordcount.keys())[list(wordcount.values()).index(most_freq_7[6])] + " " + str(most_freq_7[6]))
file_common = open("common-words.txt", "r")
commonwords = []
contents = file_common.readlines()
for i in range(len(contents)):
commonwords.append(contents[i].strip('\n'))
print(commonwords)
#From here's the code were I need to find out how to compare the lists:
alice_keys = wordcount.keys()
result = set(filter(set(alice_keys).__contains__, commonwords))
newlist = list()
for elm in alice_keys:
if elm not in result:
newlist.append(elm)
print('Here are the similar words: ' + str(newlist)) #Why doesn't show?
else:
print ('I am sorry, that filename does not exist. Please try again.')
答案 0 :(得分:0)
我不在口译员面前,所以我的代码可能会稍微偏离。但尝试更像这样的东西。
from collections import Counter
with open("some_file_with_words") as f_file
counter = Counter(f_file.read())
top_seven = counter.most_common(7)
with open("commonwords") as f_common:
common_words = f_common.read().split()
for word, count in top_seven:
if word in common_words:
print "your word " + word + " is in the most common words! It appeared " + str(count) + " times!"