def split_file(filepath, lines=30):
"""Split a file based on a number of lines."""
path, filename = os.path.split(filepath)
# filename.split('.') would not work for filenames with more than one .
basename, ext = os.path.splitext(filename)
# open input file
with open(filepath, 'r') as f_in:
try:
# open the first output file
f_out = open(os.path.join(path, '{}_{}{}'.format(basename, 0, ext)), 'w')
# loop over all lines in the input file, and number them
for i, line in enumerate(f_in):
# every time the current line number can be divided by the
# wanted number of lines, close the output file and open a
# new one
if i % lines == 0:
f_out.close()
f_out = open(os.path.join(path, '{}_{}{}'.format(basename, i, ext)), 'w')
# write the line to the output file
f_out.write(line)
finally:
# close the last output file
f_out.close()
然而,它仅在小的txt文件中起作用,但在我的目标文件中不起作用,并且没有错误信息,我不知道为什么。
答案 0 :(得分:1)
这应该有效。它有点迂回,但应该在人类可读的同时规避你的神秘错误。
首先让我们定义一些有用的功能。第一个读取文件并使每一行成为列表元素,第二行将列表作为文件写入。
注意,如果不存在具有该名称的文件,则第二个函数将创建一个新文件,如果存在,则覆盖该文件。
def line_reader(target_file):
with open(target_file, 'r') as file:
store = file.readlines()
return store
def line_writer(file_name, store):
with open(file_name, 'w') as file:
file.writelines(store)
接下来,让我们定义实际将文件分成较小文件的功能。
def breakdown(target, new_file_name, chunk_length = 10):
# First let's store a list representing the data from the original file
data = line_reader(target)
# part_no is solely for naming purposes
part_no = 0
# this list will be used to hold smaller chunks of lines
tmp_list = []
condition = True
while condition:
for i in range(chunk_length):
# just a basic check to make sure that there are still lines left to be replaced
if len(data) > 0:
tmp_list.append(data.pop(0))
else:
condition = False
tmp_list.append('\n')
break
part_no += 1
line_writer(str(new_file_name + ' ' + str(part_no)), tmp_list)
tmp_list = []
调用细分会将目标拆分为chunk_length
行的较小文件(默认为10行),最后是一行空行。最后一个文件只是原始文件中剩下的内容。