我有两个压缩文件(abc.zip和xyz.zip),两个压缩文件的内容相同。即使它们下面的文件也具有相同的数据。我想比较zip内的文件大小和两个zip内的内容的大小。 如果它们相同,则将返回True,否则返回False。
基本上,我为两个zip文件都尝试了md5签名,但是哈希值一直在变化,可能是因为标头也需要时间戳。 其次,我尝试解压缩文件,然后逐个遍历所有文件并在其中创建md5哈希值并进行比较。
from Crypto.Hash import MD5
from zipfile import ZipFile
from pathlib import Path
from fnmatch import fnmatch
from pathlib import Path
import os
def get_MD5(file_path):
chunk_size = 8192
h = MD5.new()
with open(file_path, 'rb') as f:
while True:
chunk = f.read(chunk_size)
if len(chunk):
h.update(chunk)
else:
break
return h.hexdigest()
def validate_configs(generatedzippath: str , s3downloadedzippath: str) -> bool:
flag = False
#hashm = md5()
zip1 = ZipFile(Path(generatedzippath))
zip2 = ZipFile(Path(s3downloadedzippath))
zip1Map = {}
for entry in zip1.infolist() :
zip1Map[entry.filename] = entry
zip2Map = {}
for entry in zip2.infolist() :
zip2Map[entry.filename] = entry
# Check we have same list of files
zip1KeySet = set(zip1Map.keys())
zip2KeySet = set(zip2Map.keys())
if zip1KeySet != zip2KeySet :
return "Different list of entries" + zip1KeySet.symmetric_difference(zip1KeySet)
genzipfolder = str(zip1.filename).rstrip(".zip")
downloadedzipfolder = str(zip2.filename).rstrip(".zip")
# Loop on entries
for filename in zip1KeySet :
file_location = os.path.dirname(os.path.abspath(filename))
# Is it a folder => Then no diffs, its ocntents will be checked anyway
if filename.endswith('/') : continue
# Get each entry
entry1 = zip1Map[filename]
entry2 = zip2Map[filename]
fullgenfile = os.path.join(genzipfolder, entry1.filename)
downloadedfile = os.path.join(downloadedzipfolder, entry2.filename)
genfilepath = os.path.normpath(fullgenfile)
downlaodedfilepath = os.path.normpath(downloadedfile)
print(genfilepath)
print(downlaodedfilepath)
if entry1.filename == entry2.filename:
print("cool")
if entry1.filename == entry2.filename and entry1.file_size == entry2.file_size:
hashed1 = get_MD5(genfilepath)
hashed2 = get_MD5(downlaodedfilepath)
# print(hashed1, hashed2)
if hashed1 == hashed2:
print("True")
else:
print("False")
打开文件时出现问题,提示没有文件或目录原因未提取zip。 是否有其他方法可以使两个zip的md5签名相同(不包括时间戳)