import os
import re
import sys
sys.stdout=open('f1.txt','w')
from collections import Counter
from glob import glob
def removegarbage(text):
text=re.sub(r'\W+',' ',text)
text=text.lower()
return text
folderpath='d:/induvidual-articles'
counter=Counter()
filepaths = glob(os.path.join(folderpath,'*.txt'))
num_files = len(filepaths)
with open('topics.txt','r') as filehandle:
lines = filehandle.read()
words = removegarbage(lines).split()
counter.update(words)
for word, count in counter.most_common():
probability=count//num_files
print('{} {} {}'.format(word,count,probability))
我得到零分割错误:浮点除零 为线 概率=计数// NUM_FILES
我该如何纠正它?
我需要输出格式为: 单词,计数,概率
Plz帮助!
答案 0 :(得分:8)
您的num_files
变量为0。
检查folderpath='d:/induvidual-articles'
是否正确(induvidual
拼写错误,但原始目录可能同样拼错了。)
答案 1 :(得分:1)
检查路径是否存在。如果是,请检查该目录是否包含至少1个.txt文件。并在if块中移动整个for循环
if num_files:
for word, count in counter.most_common():
...
else:
print "No text files found!"