Question

我有以下代码，应该将目录中的所有文本文件连接到一个文件中。即使我使用连接进行字符串连接，字符串连接也会变得越来越慢（60秒而不是14000个文件后的3秒）。我做错了什么？

# -*- coding: utf-8 -*-
import os
from datetime import datetime

t1 = datetime.now()

directory_in_str = "E:\\Downloads\\WikipediaAF\\Extracted\\"
directory = os.fsencode(directory_in_str)

c = 1
af = ''
for file in os.listdir(directory):
    c = c + 1
    if c % 1000 == 0:
        t2 = datetime.now()
        print('Time now: ' + str(t2 - t1))
        print(str(c) + ' out of 67062')
    #    break
    filename = os.fsdecode(file)
    with open(os.path.join(directory_in_str, filename), encoding="utf8") as f_in:
        af = ''.join([af, '== ', filename, ' ==\n', f_in.read().replace(" 'n ", " ’n ")])

Answer 1

# -*- coding: utf-8 -*-
import os
from datetime import datetime
from collections import deque

t1 = datetime.now()

directory_in_str = "E:\\Downloads\\WikipediaAF\\Extracted\\"
directory = os.fsencode(directory_in_str)

c = 1
af = deque()
for file in os.listdir(directory):
    c = c + 1
    if c % 1000 == 0:
        t2 = datetime.now()
        print('Time now: ' + str(t2 - t1))
        print(str(c) + ' out of 67062')
    #    break
    filename = os.fsdecode(file)
    with open(os.path.join(directory_in_str, filename), encoding="utf8") as f_in:
        af.append('== ')
        af.append(filename)
        af.append(' ==\n')
        af.append(f_in.read().replace(" 'n ", " ’n "))

t2 = datetime.now()
print('After read af: ' + str(t2 - t1))

af = ''.join(af)

t2 = datetime.now()
print('After join af: ' + str(t2 - t1))

with open(os.path.join(directory_in_str, 'af_out2.txt'), 'w', encoding='utf-8') as f_out:
    f_out.write(af)

使用join时Python字符串连接速度很慢

1 个答案: