我有5个不同的文本文件,1500.txt,1600.txt,1700.txt,1800.txt和1900.txt。 我从1800.txt中选择最常用的单词,并将其与其他文本文件进行比较,以确定这些单词在其他文本文件中的使用时间。我的问题是我可以打印结果,但我想在csv文件上写结果。我的代码如下: -
import sys, string
import codecs
import re
from collections import Counter
import collections
import itertools
import csv
import re
import unicodedata
common_words_1800 = Counter()
with open('E:\\Book\\1800.txt', "r", encoding='ISO-8859-1') as File_1800:
for line in File_1800:
for match in re.finditer(r'\w+', line.lower()):
word = match.group()
if len(word) > 3:
common_words_1800[word] += 1
common_words_1900 = Counter()
with open('E:\\Book\\1900.txt', "r", encoding='ISO-8859-1') as File_1900:
for line in File_1900:
for match in re.finditer(r'\w+', line.lower()):
word = match.group()
if len(word) > 3:
common_words_1900[word] += 1
common_words_1700 = Counter()
with open('E:\\Book\\1700.txt', "r", encoding='ISO-8859-1') as File_1700:
for line in File_1700:
for match in re.finditer(r'\w+', line.lower()):
word = match.group()
if len(word) > 3:
common_words_1700[word] += 1
common_words_1600 = Counter()
with open('E:\\Book\\1600.txt', "r", encoding='ISO-8859-1') as File_1600:
for line in File_1600:
for match in re.finditer(r'\w+', line.lower()):
word = match.group()
if len(word) > 3:
common_words_1600[word] += 1
common_words_1500 = Counter()
with open('E:\\Book\\1500.txt', "r", encoding='ISO-8859-1') as File_1500:
for line in File_1500:
for match in re.finditer(r'\w+', line.lower()):
word = match.group()
if len(word) > 3:
common_words_1500[word] += 1
for (word, count) in common_words_1800.most_common(50):
try:
count_in_file2 = common_words_1900[word]
count_in_file3 = common_words_1700[word]
count_in_file4 = common_words_1600[word]
count_in_file5 = common_words_1500[word]
except KeyError:
count_in_file2 = 0
print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(word, count, count_in_file2, count_in_file3, count_in_file4, count_in_file5))