如何提取zip文件夹并查找特定的文件名Python 2

时间:2019-07-19 22:03:44

标签: python python-2.7

我的代码当前解压缩一个zip文件夹并遍历该文件夹中的每个文件。但是,我需要它来读取该zip文件夹中名为“ file.txt”的特定文件名。我尝试仅从zip文件夹中读取“ file.txt”,但最终运行了该文件夹中的所有文件。有什么建议么?我的代码输出到一个文本文件,我需要读取的只是“ file.txt”,但它读取所有以“ .txt”结尾的文件,如实际结果所示

    import re
    import os
    from zipfile import ZipFile
    def pan():
        print("Extract single file from ZIP")
        with ZipFile('input_files.zip', 'r') as zipObj:
            listOfFileNames = zipObj.namelist()
            #zipObj.extractall()
            for fileName in listOfFileNames:
                if fileName.endswith('.txt'):
                    zipObj.read(fileName, 'file.txt') 
    ##def main():
        comma_string = ', '
        outfile = "output2.txt"                 #this will be the filename that the code will write to 
        wp_string = " White Space Detected"          
        tab_string = " tab detected"
        mc_string = " Missing carriage return"
        ne_string = " No Error"                 #If there is no whitespace,tabs, then there is no error
        baconFile = open(outfile,"wt") 
        print('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n')         #This prints the master column in the python shell and this is the way the code should collect the data 
        baconFile.write('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the output file and this is the way the code should collect the data

        for filename in os.listdir(os.getcwd() + "/input_files"):
            with open("input_files/" + filename, 'r') as f:
                output_contents(filename, f, baconFile)
        baconFile.close()       #closes the for loop that the code is writing to


    def output_contents(filename, f, baconFile):     #using open() function to open the file inside the directory
        index = 0
        for line in f:
                                        #create a list of all of the numerical values in our line
            content = line.split(',')       #this will be used to count the amount numbers before and after comma
            whitespace_found = False
            tab_found = False
            false_string = "False (end of file)"
            carriage_found = false_string 
            sigfigs = ""

            index += 1                            #adds 1 for every line if it finds what the command wants

            if " " in line:                         #checking for whitespace
                whitespace_found = True
            if "\t" in line:                        #checking for tabs return
                tab_found = True
            if '\n' in line:                    #checking if there is a newline after the end of each line
                carriage_found = True                                        
            sigfigs = (','.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line )))    #counts the sigsfigs after decimal point 

            print(filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
              .format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))   #whatever is inside the .format() is the way it the data is stored into
            baconFile.write('\n')
            baconFile.write( filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
                        .format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))



    if __name__ == '__main__':
        #main()
        pan()

预期:

Filename    Line    number of numbers   string separated by a comma white space found   tab found   carriage return found

file.txt    1       3                   0,3,4                       False               False       True                 
file.txt    2       3                   0,0,1                       True                True        True                 
file.txt    3       3                   7,3,0,1                     False               False       True                 
file.txt    4       1                   1                           False               False       True                 
file.txt    5       3                   5,0,1                       False               False       False (end of file)  

实际:

Filename    Line    number of numbers   string separated by a comma white space found   tab found   carriage return found

alot.txt    1       3                   0,3,4                       False               False       True                 
alot.txt    2       3                   0,0,1                       True                True        True                 
alot.txt    3       3                   7,3,0,1                     False               False       True                 
alot.txt    4       1                   1                           False               False       True                 
alot.txt    5       3                   5,0,1                       False               False       False (end of file)  
file.txt    1       3                   0,3,4                       False               False       True                 
file.txt    2       3                   0,0,1                       True                True        True                 
file.txt    3       3                   5,0,1                       False               False       False (end of file)  
file_2.txt  1       3                   0,3,4                       False               False       True                 
file_2.txt  2       3                   0,0,1                       True                True        True                 
file_2.txt  3       3                   5,0,1                       False               False       False (end of file)  
INCOME.txt  1       3                   0,3,4                       False               False       True                 
INCOME.txt  2       3                   0,0,1                       True                True        True                 
INCOME.txt  3       3                   5,0,1                       False               False       True                 
INCOME.txt  4       3                   4,3,0                       True                True        True                 
INCOME.txt  5       3                   7,3,0,1                     False               False       True                 
INCOME.txt  6       1                   1                           False               False       False (end of file)  

1 个答案:

答案 0 :(得分:0)

您没有提取任何文件或文件夹,因为您的 pan 功能不正确。 [Python 3.Docs]: zipfile - ZipFile.read(name, pwd=None)收到2个参数(成员名和(可选)密码),但是您传递的内容是错误的。

此版本的 pan 将从<中提取任何 file.txt (在所有 dir 中)。 em> .zip 存档,并将其保存在当前的 dir 中(及其路径在存档中):

def pan(archive_name="input_files.zip", member_base_name="file.txt"):
    print("Extract files by name from ZIP")
    with ZipFile(archive_name, "r") as zip_file:
        for name in zip_file.namelist():
            if name.split("/")[-1] == member_base_name:
                print("Handling: {0:s}".format(name))
                zip_file.extract(name)
                #zip_file.read(name)  # This line is useless if doing nothing with the file content.

@ EDIT0

获得更多详细信息后,这是 pan 函数的更新版本(我删除了所有未使用的变量):

def pan():
    print("Extract single file from ZIP")
    archive_name = "input_files.zip"
    file_name = "file.txt"
    lines = []
    with ZipFile(archive_name, "r") as zip_file:
        for member_name in zip_file.namelist():
            if member_name == file_name:
                lines = zip_file.read(member_name).decode().split("\n")

    bacon_file_name = "output2.txt"
    heading = "Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n"
    print(heading)
    with open(bacon_file_name, "wt") as bacon_file:
        bacon_file.write(heading)
        output_contents(file_name, lines, bacon_file)