运行以下代码时遇到错误。
我想删除停用词,但是不起作用!
def cut_txt(old_file):
from string import punctuation
import jieba
jieba.load_userdict("user_dictionary.csv")
stopwords = [line.strip().decode('utf-8') for line in open('stop_words.txt').readlines() ]
global cut_file # 分词之后保存的文件名
cut_file = old_file + '_cut.txt'
try:
fi = open(old_file, 'r', encoding='utf-8')
except BaseException as e: # 因BaseException是所有错误的基类,用它可以获得所有错误类型
print(Exception, ":", e) # 追踪错误详细信息
text = fi.read() # 获取文本内容
new_text = jieba.cut(text, cut_all=False) # 精确模式
str_out = ' '.join(new_text).replace(',', '').replace('。', '').replace('?', '').replace('!', '') \
.replace('“', '').replace('”', '').replace(':', '').replace('…', '').replace('(', '').replace(')', '') \
.replace('—', '').replace('《', '').replace('》', '').replace('、', '').replace('‘', '') \
.replace('’', '').replace(',', '').replace('【', '').replace('】', '').replace('"', '')\
.replace('#','').replace('...','').replace('?','').replace('『','') # 去掉标点符号
#去除停用词
final = ''
for seg in str_out:
seg = seg.encode('gbk')
if seg not in stopwords:
final += seg
fo = open(cut_file, 'w', encoding='utf-8')
fo.write(final)
cut_txt('你的天空')