在Python中复制zip文件的内容

时间:2014-07-18 12:18:11

标签: python python-2.7

我希望以递归方式搜索网络目录,以识别.xls个文件中的所有zip个文件。对于XLS文件中找到的每个zip文件,我想将其复制到C:中的本地位置。到目前为止,这是我的脚本:

import os
import zipfile
import fnmatch
import shutil

rootPath = "L:\Data\Cases"
destPath = "C:\Test"
allFileList = []
zipList = []

# Create a list containing all files contained within L:\Data\Cases
for dirname, dirnames, filenames in os.walk(rootPath):
    for filename in filenames:
        allFileList.append(os.path.join(dirname, filename))

# Return a list of filepaths containing zipfiles.
for file in allFileList:
    if file.endswith(".zip"):
        zipList.append(file)

for file in zipList:
    with zipfile.ZipFile(file) as zip_file:
        for member in zip_file.namelist():
            if member.endswith(".xls"):
                filename = os.path.basename(member)
                if not filename:
                    continue
                source = zip_file.open(member)
                target = os.path.join(destPath, filename)
                shutil.copy2(source, target)

错误代码如下。我认为错误是由将压缩容器中的文件实际复制到目标路径引起的。

Traceback (most recent call last):
  File "C:/Users/user/Desktop/parsecsv.py", line 30, in <module>
    shutil.copy2(source, target)
  File "C:\Program Files\Python278\lib\shutil.py", line 130, in copy2
    copyfile(src, dst)
  File "C:\Program Files\Python278\lib\shutil.py", line 68, in copyfile
    if _samefile(src, dst):
  File "C:\Program Files\Python278\lib\shutil.py", line 63, in _samefile
    return (os.path.normcase(os.path.abspath(src)) ==
  File "C:\Program Files\Python278\lib\ntpath.py", line 487, in abspath
    path = _getfullpathname(path)

有什么建议吗?

2 个答案:

答案 0 :(得分:2)

ZipFile.open()不会返回文件系统路径,而是返回类似文件的ZipExtFile对象。您想要的是ZipFile.extract()(然后您根本不需要shutil.copy()):

# NB : untested code, refer to the doc for more infos
for file in zipList:
    with zipfile.ZipFile(file) as zip_file:
        for member in zip_file.namelist():
            if member.endswith(".xls"):
                zip_file.extract(member, destPath)

另外还有FWIW,您不需要先构建所有文件的列表,然后构建一个zipfile列表,然后在此列表中进行迭代 - 您也可以一次完成整个过程:

for dirname, dirnames, filenames in os.walk(rootPath):
    for filename in filenames:
        if not filename.endswith(".zip"):
            continue 
        fullpath = os.path.join(dirname, filename))
        with zipfile.ZipFile(fullpath) as zip_file:
            for member in zip_file.namelist():
                if member.endswith(".xls"):
                    zip_file.extract(member, destPath)

答案 1 :(得分:2)

正如bruno所说,我认为你无法检查zipfile内容,但我认为一种更清晰的方法可以在提取后删除它们,因此你可以使用shutil.rmtree删除其他内容。

def main():
rootPath = "C:\\rootpath"
destPath = "C:\\Test"
allFileList = []
zipList = []
# Create a list containing all files contained within L:\Data\Cases
for dirname, dirnames, filenames in os.walk(rootPath):
    for filename in filenames:
        allFileList.append(os.path.join(dirname, filename))

# Return a list of filepaths containing zipfiles.
for file in allFileList:
    if file.endswith(".zip"):
        zipList.append(file)


for file in zipList:
    with zipfile.ZipFile(file) as zip_file:
        for member in zip_file.namelist():
            if member.endswith(".xls"):
                zip_file.extract(member, destPath)

for dirname, dirnames, filenames in os.walk(destPath):
    for filename in filenames:
        if not filename.endswith(".xls"):
            shutil.rmtree(filename)

if __name__ == '__main__':
main()