如何使用python比较两个zip文件内容(带有某些签名)?

时间:2019-07-08 11:57:57

标签: python-3.x

我有两个压缩文件(abc.zip和xyz.zip),两个压缩文件的内容相同。即使它们下面的文件也具有相同的数据。我想比较zip内的文件大小和两个zip内的内容的大小。 如果它们相同,则将返回True,否则返回False。

基本上,我为两个zip文件都尝试了md5签名,但是哈希值一直在变化,可能是因为标头也需要时间戳。 其次,我尝试解压缩文件,然后逐个遍历所有文件并在其中创建md5哈希值并进行比较。

from Crypto.Hash import MD5
from zipfile import ZipFile
from pathlib import Path
from fnmatch import fnmatch
from pathlib import Path
import os
def get_MD5(file_path):

    chunk_size = 8192

    h = MD5.new()

    with open(file_path, 'rb') as f:
        while True:
            chunk = f.read(chunk_size)
            if len(chunk):
                h.update(chunk)
            else:
                break

    return h.hexdigest()
def validate_configs(generatedzippath: str , s3downloadedzippath: str) -> bool:
    flag = False
    #hashm = md5()
    zip1 = ZipFile(Path(generatedzippath))
    zip2 = ZipFile(Path(s3downloadedzippath))
    zip1Map = {}
    for entry in zip1.infolist() :
        zip1Map[entry.filename] = entry
    zip2Map = {}
    for entry in zip2.infolist() :
        zip2Map[entry.filename] = entry

    # Check we have same list of files
    zip1KeySet = set(zip1Map.keys())
    zip2KeySet = set(zip2Map.keys())

    if zip1KeySet != zip2KeySet :
        return "Different list of entries" + zip1KeySet.symmetric_difference(zip1KeySet)
    genzipfolder = str(zip1.filename).rstrip(".zip")
    downloadedzipfolder = str(zip2.filename).rstrip(".zip")
    # Loop on entries
    for filename in zip1KeySet :
        file_location = os.path.dirname(os.path.abspath(filename)) 



        # Is it a folder => Then no diffs, its ocntents will be checked anyway
        if filename.endswith('/') : continue

        # Get each entry
        entry1 = zip1Map[filename]
        entry2 = zip2Map[filename]
        fullgenfile = os.path.join(genzipfolder, entry1.filename)
        downloadedfile = os.path.join(downloadedzipfolder, entry2.filename)
        genfilepath = os.path.normpath(fullgenfile)
        downlaodedfilepath = os.path.normpath(downloadedfile)
        print(genfilepath)
        print(downlaodedfilepath)
        if entry1.filename == entry2.filename:
            print("cool")
        if entry1.filename == entry2.filename and entry1.file_size == entry2.file_size:
            hashed1 = get_MD5(genfilepath)
            hashed2 = get_MD5(downlaodedfilepath)
        #    print(hashed1, hashed2)
            if hashed1 == hashed2:
                print("True")
            else:
                print("False")

打开文件时出现问题,提示没有文件或目录原因未提取z​​ip。 是否有其他方法可以使两个zip的md5签名相同(不包括时间戳)

0 个答案:

没有答案