我正在尝试创建一个可能很大的文件夹的zip存档。 为此,我正在使用python zipfile模块,但据我所知,没有选择将创建的归档文件拆分为最大大小的多个块。
压缩的存档应该通过电报发送,每个文件的大小限制为1.5 GB。因此,我需要分割生成的zip存档。
我真的不希望使用子进程和shell命令来创建此存档。
我当前的代码如下:
def create_zip(archive_name, directory):
"""Create a zip file from given dir path."""
with ZipFile(archive_name, "w", ZIP_LZMA) as target_zip_file:
for root, _, files in os.walk(directory):
for file_to_zip in files:
absolute_path = os.path.join(root, file_to_zip)
zip_file_name = absolute_path[len(directory) + len(os.sep):]
target_zip_file.write(absolute_path, zip_file_name)
return target_zip_file
预先感谢
答案 0 :(得分:1)
如果找不到zipfile的更好的本机方式,您仍然可以自己编写文件拆分算法。像这样:
outfile = archive_name
packet_size = int(1.5 * 1024**3) # bytes
with open(outfile, "rb") as output:
filecount = 0
while True:
data = output.read(packet_size)
print(len(data))
if not data:
break # we're done
with open("{}{:03}".format(outfile, filecount), "wb") as packet:
packet.write(data)
filecount += 1
类似于将其放回接收器一侧。
答案 1 :(得分:0)
这是我用来通过电报bot将文件发送到电报通道的工具。 电报bot上传文件的大小限制为50MB
#! /usr/bin/python3
# -*- coding:utf-8 -*-
# apt-get install p7zip-full
import subprocess
import os
import math
import logzero
SPLIT_SIZE = 49
def file_split_7z(filePath, split_size=SPLIT_SIZE, removeOrigin=False):
CMD_7Z = '7z a -v' + str(split_size) + "m -y '{}' '{}' -mx0"
originFilePath = ''
if os.path.splitext(filePath)[1] == '.7z':
originFilePath = filePath
filePath = os.path.splitext(originFilePath)[0] + '.7zo'
os.rename(originFilePath, filePath)
fz = os.path.getsize(filePath) / 1024 / 1024
pa = math.ceil(fz / split_size)
fn = os.path.splitext(filePath)[0].replace('.', '_')
subprocess.call(CMD_7Z.format(fn, filePath), shell=True)
file_list = []
for i in range(pa):
file_list.append('{}.7z.{:03d}'.format(fn, i + 1))
if originFilePath:
os.rename(filePath, originFilePath)
filePath = originFilePath
if removeOrigin:
os.remove(filePath)
file_list = [os.path.abspath(i) for i in file_list]
return file_list
def split7zFile(filePath, logger=logzero.logger):
file_size = os.path.getsize(filePath) / 2**20
if file_size > SPLIT_SIZE :
split_part = math.ceil(file_size / SPLIT_SIZE)
split_size = math.ceil(file_size / split_part)
logger.info('file size | {} | split num | {} |split size | {}'.format(file_size, split_part, split_size))
filePathList = file_split_7z(filePath, split_size=split_size, removeOrigin=False)
else:
filePathList = [filePath]
return filePathList