我需要找到30个最常见的,请帮助我。我也尝试使用计数器但是没有工作
import re
import string
frekuensi = {}
doc= open('doc-1.txt', 'r')
teks = doc.read().lower()
regex=r'\b[a-z]{4,20}\b'
find = re.findall(regex, teks)
for kata in find:
hitung = frekuensi.get(kata, 0)
frekuensi[kata] = hitung + 1
daftarf = frekuensi.keys()
for kata2 in daftarf:
print kata2, frekuensi[kata2]