我编写了一个代码,用于从两个文件中提取所有单词,并且只返回两个文件中的单词。 但是,我做了一些重复,这不是一个好的风格,所以我想知道是否可以用我的代码避免这个?
import re
def print_common_words(filename_1, filename_2):
try:
input_file = open(filename_1, 'r')
source_string = input_file.read().lower()
input_file.close()
all_words1 = set(re.findall('[a-zA-Z]+', source_string))
input_file = open(filename_2, 'r') #Repetition
source_string = input_file.read().lower() #Repetition
input_file.close() #Repetition
all_words2 = set(re.findall('[a-zA-Z]+', source_string)) #Repetition
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)
except FileNotFoundError:
print("A file could not be found.")
答案 0 :(得分:2)
使用方法分解出重复的代码。
def get_file(file):
input_file = open(file, 'r')
source_string = input_file.read().lower()
input_file.close()
return set(re.findall('[a-zA-Z]+', source_string))
称之为:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
例如:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)