扫描目录和子目录中的文件

时间:2017-09-26 17:20:53

标签: python python-2.7 python-3.x

基本上我要做的是搜索文件夹结构中的文件以获取所提供的发票列表,并将所需数据复制到新文件中。我的下面的脚本按照描述工作,但脚本在包含子目录的搜索文件夹上窒息。我需要修改脚本来扫描根文件夹及其子目录文件。不知道该怎么做,我尝试了几种不同的代码更新,但似乎不起作用:

import tkinter
import os
import fnmatch
from tkinter import *
from tkinter import messagebox as tkMessageBox
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory
from pathlib import PureWindowsPath
from pathlib import Path

#filedialog  

content = ''
BrowsePath = ''
SearchPath = ''

top = tkinter.Tk()

#********************************************************FIELDS****************************************************************************
#Browse entry field
Browse1 = Label(text="Search List:").grid(row=0)

BrowsePath = StringVar()
BrowsePath.set("Select File Containing Invoice Numbers")
BrowseL = Label(bd=5,textvariable=BrowsePath, width=100,relief=SUNKEN).grid(row=0,column=1)



#Search Folder
Searce1 = Label( text="Search Folder:").grid(row=1)

SearchPath = StringVar()
SearchPath.set("Select Folder to Search")
SearchL = Label(bd=5,textvariable=SearchPath, width=100,relief=SUNKEN).grid(row=1,column=1)



#OutputFile
OutputL1 = Label( text="Output File:").grid(row=2)

OutputPath = StringVar()
OutputPath.set("File to Save Results to")
OutputL2 = Label(bd=5,textvariable=OutputPath, width=100,relief=SUNKEN).grid(row=2,column=1)


#********************************************************FUNCTIONS****************************************************************************

#Process complete function
def GetCallBack():
   tkMessageBox.showinfo( "Find Invoices", "Processing complete!")


#********************************************************FILE PICKERS****************************************************************************

    #Select file containing list of invoices
def GetFile():
    global content
    global BrowsePath
    filename = askopenfilename()
    infile = open(filename,'r')
    content = infile.read()
    BrowsePath.set(os.path.realpath(filename))
    return content

    #Select directory containing invoice files
def SearchDir():
    global content
    global SearchPath
    pathname = askdirectory()
    SearchPath.set(os.path.realpath(pathname))
    return content


    #Creates the save file with isolated invoices
def SaveFile():

    filename = os.path.abspath(os.path.join(SearchPath.get(),"Results.txt"))

    OutputPath.set(filename) #update label with location of file




#********************************************************READING invoice LIST FILE****************************************************************************


def  GetPOCount():
    PO = [line.rstrip('\n') for line in open(os.path.realpath(BrowsePath.get()))] #isolates list of invoices
    ponum_count = sum(1 for line in open(os.path.realpath(BrowsePath.get()))) #gets count of invoice numbers
    return PO, ponum_count #can be indexed


def GetFileNames():
    files = os.listdir(SearchPath.get()) #gets list of files
    return files #can be indexed

def GetFileLineCount():
    files = GetFileNames()
    file_count = len(fnmatch.filter(os.listdir(SearchPath.get()),'*.*'))
    line_count = sum(1 for line in open(os.path.realpath(os.path.join(SearchPath.get(),files[file_count-1])))) #gets count of lines in invoice file
    return line_count, file_count

def FindPOs():
    po_number = GetPOCount()[0]
    po_counter = GetPOCount()[1]

    print(po_number)
    print(po_counter)


    file_counter = GetFileLineCount()[1] 
    file_name = GetFileNames()

    print(file_name)
    print(file_counter)


    # For each file
    for filename in file_name:
        print("Searching " + filename)

        with open(os.path.join(SearchPath.get(),filename),'r') as content_file:
            line_count = sum(1 for line in content_file) #gets count of lines in invoice file
            print(line_count)
            po_line = [line.rstrip('\n') for line in open(os.path.realpath(os.path.join(SearchPath.get(),filename)))] #isolates each line
            result_filename = os.path.abspath(os.path.join(os.path.dirname(SearchPath.get()),"Results.txt"))
            OutputPath.set(result_filename)
            log = os.path.abspath(os.path.join(os.path.dirname(SearchPath.get()),"FoundInvoices.txt"))

            # For each line in file
            #TODO: make this for each po_line
            for PONum in po_number:
                print("looking for " + PONum)

                for line in range (0,line_count):

                    #locate Header Record
                    if po_line[line][16:18] == "10" or po_line[line][15:17] == "10":
                        print("On a header record")

                        if PONum in po_line[line].strip():
                            print("Looking for " + PONum)
                            # Write the current line to the results file
                            with open(result_filename,'a+') as file:
                                file.write(po_line[line] + '\n')

                            # Write this PONum to the log file
                            with open(log,'a+') as logs:
                                logs.write(PONum + '\n')

                            # Loop from the next line to the end
                            with open(result_filename,'a+') as file:
                                for z in range (line+1,line_count):
                                    if ((po_line[z][16:18] != "10") and (po_line[z] != '\n') and (po_line[z][15:17] != "10") and (po_line[z][16:18] != "05")):
                                        file.write(po_line[z] + '\n')
                                    else:
                                    # Once we've found a "10" or newline, stop printing this PO
                                        break


    GetCallBack()







#********************************************************BUTTONS****************************************************************************

# Search List Browse Button logic
BrowseButton = tkinter.Button(text ="Browse", command = GetFile).grid(row=0,column = 2)

# Search Directory Button logic
SearchButton = tkinter.Button(text ="Search", command = SearchDir).grid(row=1,column = 2)

# Find POs Button Logic
FindButton = tkinter.Button( text ="Get Invoices", command = FindPOs).grid(row=4,column = 1)


top.mainloop()

1 个答案:

答案 0 :(得分:1)

你的代码有点过于复杂。也许这只是总代码的一部分。例如,GetFileLineCount()返回两个变量,但其中一个变量从未在您的代码中使用过。 GetFileNames()可以产生相同的结果。

def GetFileNames():
    files = os.listdir(SearchPath.get()) #gets list of files
    file_count = len(fnmatch.filter(files),'*.*'))
    return files, file_count #can be indexed

或者更好的是你可以用os.walk()函数替换GetFileNames:

def GetFileNames():
    filepaths = []
    for root,dir,files in os.walk(SearchPath.get()):
        if len(files) > 0:
            for file in files:
                filepaths.append(os.path.join(root,file))
    return filepaths

这将为您提供SearchPath中所有文件的列表。然后,您可以使用相同的循环,但不必每次都使用您的文件名加入SearchPath:

for filename in filepaths:
        print("Searching " + filename)

        with open(filename,'r') as content_file:
            line_count = sum(1 for line in content_file)

......等等。

注意 - 我没有为您重写所有代码。您可能需要在此处进行一些修改才能使其工作,但这应该可以为您的问题提供解决方案。