递归解压缩归档,在字典中存储(文件名,提取内容)

时间:2012-06-05 17:59:50

标签: python serialization dictionary zip zipfile

请你帮我写一个函数返回:

dict("file1.txt": list(<contents of file1>),
     "file2.txt": list(<contents of file2>),
     "file3.txt": list(<contents of file3>),
     "file4.txt": list(<contents of file4>))

输入时:

    file.zip:
        outer\
        outer\inner1.zip:
                file1.txt
                file2.txt
        outer\inner2.zip:
                file3.txt
                file4.txt

我的尝试(以下例外情况):

2 个答案:

答案 0 :(得分:3)

最终解决了......来自Extracting a zipfile to memory?;

的一些帮助
from zipfile import ZipFile, is_zipfile

def extract_zip(input_zip):
    input_zip=ZipFile(input_zip)
    return {name: input_zip.read(name) for name in input_zip.namelist()}

def extract_all(input_zip): 
    return {entry: extract_zip(entry) for entry in ZipFile(input_zip).namelist() if is_zipfile(entry)}

答案 1 :(得分:1)

修改了你的代码(你应该在删除之前关闭ZipFile +添加内部zip文件的提取):

import os
import shutil
import tempfile
from zipfile import ZipFile

def unzip_recursively(parent_archive):
    parent_archive = ZipFile(parent_archive)
    result = {}
    tmpdir = tempfile.mkdtemp()
    try:
        parent_archive.extractall(path=tmpdir)
        namelist=parent_archive.namelist()
        for name in namelist[1:]:
            innerzippath = os.path.join(tmpdir, name)
            inner_zip = ZipFile(innerzippath)
            inner_extract_path = innerzippath+'.content'
            if not os.path.exists(inner_extract_path):
                os.makedirs(inner_extract_path)
            inner_zip.extractall(path=inner_extract_path)

            for inner_file_name in inner_zip.namelist():
                result[inner_file_name] = open(os.path.join(inner_extract_path, inner_file_name)).read()
            inner_zip.close()
    finally:
        shutil.rmtree(tmpdir)
    return result

if __name__ == '__main__':
    print unzip_recursively('file.zip')