这是我到目前为止的代码。我想打印一个字典/单词计数,该单词在.txt文件中的使用次数。但是我弹出的不是英语的单词有问题。
dropChars = "*!@#$%ˆ&()½_+-={}[]|\\:;\"’<>,.?/1234567890"
dropDict = dict([(c, ' ') for c in dropChars])
dropTable = str.maketrans(dropDict)
with open(input("Paste file path here: "), 'r') as file:
wordNum = 0;
table = {} # dictionary: words -> set of line numbers
for line in file:
words = line.upper().translate(dropTable).split()
for word in words:
wordNum += 1
if word in table:
table[word] = table[word] + 1
else:
table[word] = 1
sorted_table = (sorted(table.items(), key=lambda kv: kv[1], reverse=True))
for (k,v) in sorted_table:
print(str(k) + " : " + str(v))
答案 0 :(得分:0)
出于某种原因,当我认为应该这样做时,这是行不通的...
dropChars = "*!@#$%ˆ&()½_+-={}[]|\\:;\"’<>,.?/1234567890"
dropDict = dict([(c, ' ') for c in dropChars])
dropTable = str.maketrans(dropDict)
english_words = open("/Users/MaxEllis/Desktop/Griffith/Repo/english_words.txt", "r+")
e_words = set(word.strip().lower() for word in english_words)
def is_english_word(word):
return word.lower() in e_words
with open(input("Paste file path here: "), 'r') as file:
# Make a translation table for getting rid of non-word characters
wordNum = 0;
table = {} # dictionary: words -> set of line numbers
for line in file:
words = line.upper().translate(dropTable).split()
for word in words:
wordNum += 1
if word in table:
table[word] = table[word] + 1
else:
table[word] = 1
sorted_table = (sorted(table.items(), key=lambda kv: kv[1], reverse=True))
for (k, v) in sorted_table:
print(str(k) + " : " + str(v))
english_words.close()