我有以下代码,应该将目录中的所有文本文件连接到一个文件中。即使我使用连接进行字符串连接,字符串连接也会变得越来越慢(60秒而不是14000个文件后的3秒)。我做错了什么?
# -*- coding: utf-8 -*-
import os
from datetime import datetime
t1 = datetime.now()
directory_in_str = "E:\\Downloads\\WikipediaAF\\Extracted\\"
directory = os.fsencode(directory_in_str)
c = 1
af = ''
for file in os.listdir(directory):
c = c + 1
if c % 1000 == 0:
t2 = datetime.now()
print('Time now: ' + str(t2 - t1))
print(str(c) + ' out of 67062')
# break
filename = os.fsdecode(file)
with open(os.path.join(directory_in_str, filename), encoding="utf8") as f_in:
af = ''.join([af, '== ', filename, ' ==\n', f_in.read().replace(" 'n ", " ’n ")])
答案 0 :(得分:0)
# -*- coding: utf-8 -*-
import os
from datetime import datetime
from collections import deque
t1 = datetime.now()
directory_in_str = "E:\\Downloads\\WikipediaAF\\Extracted\\"
directory = os.fsencode(directory_in_str)
c = 1
af = deque()
for file in os.listdir(directory):
c = c + 1
if c % 1000 == 0:
t2 = datetime.now()
print('Time now: ' + str(t2 - t1))
print(str(c) + ' out of 67062')
# break
filename = os.fsdecode(file)
with open(os.path.join(directory_in_str, filename), encoding="utf8") as f_in:
af.append('== ')
af.append(filename)
af.append(' ==\n')
af.append(f_in.read().replace(" 'n ", " ’n "))
t2 = datetime.now()
print('After read af: ' + str(t2 - t1))
af = ''.join(af)
t2 = datetime.now()
print('After join af: ' + str(t2 - t1))
with open(os.path.join(directory_in_str, 'af_out2.txt'), 'w', encoding='utf-8') as f_out:
f_out.write(af)