我在使用python解压缩文件时遇到了问题
背景 我有大量文件需要在云(crestle.ai)的jupyter笔记本中上载,手动上载是不切实际的,因此我将整个文件夹压缩为文件,然后将其上载到我的个人Google驱动器中,然后得到了一个可共享的链接,使用wget将zip文件下载到juypter笔记本的我的工作区中
问题
我尝试解压缩我上传的文件时遇到的问题是badzipfile错误,我遵循了几个线程的要求,但是没有一种解决方案对我有效
import zipfile
with zipfile.ZipFile("Title.zip","r") as zip_ref:
zip_ref.extractall("title")
BadZipFile Traceback (most recent call last)
<ipython-input-14-552046070dd2> in <module>
1 import zipfile
----> 2 with zipfile.ZipFile("Title.zip","r") as zip_ref:
3 zip_ref.extractall("title")
/opt/conda/lib/python3.6/zipfile.py in __init__(self, file, mode, compression, allowZip64)
1129 try:
1130 if mode == 'r':
-> 1131 self._RealGetContents()
1132 elif mode in ('w', 'x'):
1133 # set the modified flag so central directory gets written
/opt/conda/lib/python3.6/zipfile.py in _RealGetContents(self)
1196 raise BadZipFile("File is not a zip file")
1197 if not endrec:
-> 1198 raise BadZipFile("File is not a zip file")
1199 if self.debug > 1:
1200 print(endrec)
BadZipFile: File is not a zip file
所以我用谷歌搜索并找到了两个可能的解决方案
def fixBadZipfile(zipFile):
f = open(zipFile, 'r+b')
data = f.read()
pos = data.find(b'\x50\x4b\x05\x06') # End of central directory signature
if (pos > 0):
self._log("Trancating file at location " + str(pos + 22)+ ".")
f.seek(pos + 22) # size of 'ZIP end of central directory record'
f.truncate()
f.close()
else:
print("trucatedfile")
以上代码对我的情况没有帮助
import mmap
from io import UnsupportedOperation
from zipfile import BadZipfile
# The end of central directory signature
CENTRAL_DIRECTORY_SIGNATURE = b'\x50\x4b\x05\x06'
def repair_central_directory(zipFile):
if hasattr(zipFile, 'read'):
# This is a file-like object
f = zipFile
try:
fileno = f.fileno()
except UnsupportedOperation:
# This is an io.BytesIO instance which lacks a backing file.
fileno = None
else:
# Otherwise, open the file with binary mode
f = open(zipFile, 'rb+')
fileno = f.fileno()
if fileno is None:
# Without a fileno, we can only read and search the whole string
# for the end of central directory signature.
f.seek(0)
pos = f.read().find(CENTRAL_DIRECTORY_SIGNATURE)
else:
# Instead of reading the entire file into memory, memory-mapped the
# file, then search it for the end of central directory signature.
# Reference: https://stackoverflow.com/a/21844624/2293304
mm = mmap.mmap(fileno, 0)
pos = mm.find(CENTRAL_DIRECTORY_SIGNATURE)
mm.close()
if pos > -1:
# size of 'ZIP end of central directory record'
f.truncate(pos + 22)
f.seek(0)
return f
else:
# Raise an error to make it fail fast
raise BadZipfile('File is not a zip file')
repair_central_directory('Title.zip')
---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
<ipython-input-13-23cfd0f65d89> in <module>
----> 1 repair_central_directory('Title.zip')
<ipython-input-12-64afa597a2dc> in repair_central_directory(zipFile)
39 else:
40 # Raise an error to make it fail fast
---> 41 raise BadZipfile('File is not a zip file')
BadZipFile: File is not a zip file
任何建议表示赞赏