I want to loop over a number of files and find the counts of words based on word.startswith()
in Python3x. There are 3 files: file1.txt, file2.txt and file3.txt
each one contains one word which is for
, i.e., this is only for the sake of demonstration. At the end of execution, I get the output, for 1
, as if it exists only one time. The output should be for 3
.
I am sure there is something wrong with defining match
and resetting it at the end of if conditions
, but I can't figure out how to fix it. Below is the code:
import glob
from collections import Counter
import csv
# Read raw data
list_of_files = glob.glob('*.txt')
# Read locative words, i.e, prepositions
with open("loctives_file.txt", 'r', encoding='utf-8') as f:
locatives = [line.strip() for line in f]
record = []
def locatives_frequency(list_of_files, locatives):
for file in list_of_files:
text = open(file, 'r', encoding='utf-8').read()
lst = text.strip().split()
del text
# Define match list
match = []
for i in range(len(lst)):
for locative in locatives:
if lst[i].startswith('f'):
match.append(lst[i])
record.append([locative, len(match)])
match = []
# Now count the final results
records = [[k, int(v)] for k, v in record]
results = Counter(dict(records))
# Print the frequencies to xlsx file
with open("freeCounts.xlsx", 'a', newline='', encoding='utf-8') as workbook:
locatives = csv.writer(workbook, delimiter=',')
locatives.writerow(["Locative", "Free Count"])
for k,v in results.items():
with open("freeCounts.xlsx", 'a', newline='', encoding='utf-8') as workbook:
locatives = csv.writer(workbook, delimiter=',')
data = [k, v]
locatives.writerow(data)
print("Finished!")
# Call the function
locatives_frequency(list_of_files, locatives)
答案 0 :(得分:1)
我相信你应该直接使用collections.Counter
,而不是先创建一个match
列表,然后将内容添加到record
列表等等。这在很多方面都出错了方式。
示例 -
def locatives_frequency(list_of_files, locatives):
results = Counter()
for file in list_of_files:
with open(file, 'r', encoding='utf-8') as f:
text = f.read()
lst = text.strip().split()
del text
for i in range(len(lst)):
for locative in locatives:
if lst[i].startswith(locative):
results[locative] += 1
# Print the frequencies to xlsx file
with open("freeCounts.xlsx", 'a', encoding='utf-8') as workbook:
locatives = csv.writer(workbook, delimiter=',')
locatives.writerow(["Locative", "Free Count"])
for k,v in results.items():
with open("freeCounts.xlsx", 'a', encoding='utf-8') as workbook:
locatives = csv.writer(workbook, delimiter=',')
data = [k, v]
locatives.writerow(data)
print("Finished!")