在python中按文件类型递归目录和日志文件

时间:2013-01-31 04:01:08

标签: python directory-structure

我想调查一组目录并获取以下信息

  1. 按文件类型列出的文件数
  2. 按文件类型列出的文件列表

  3. 每个子目录目录1& 2

  4. 我有以下代码。 ext列表生成器很好。我被困在如何为每个扩展名分配列表名称和计数器。我们不知道这些是什么或有多少。不确定此后可能出现的其他问题。

    import os, sys, datetime
    
    top = os.getcwd() # change to a specific path if required.
    RootOutput = top
    SourceDIR = top
    outDIR = top+"\\workingFiles" # directory where output is written to. Includes temp files
    # END setting base paths
    # NOTHING BELOW should need editing.
    List =[]
    extList=[]
    
    os.chdir(top)
    
    for root, dirs, files in os.walk(SourceDIR, topdown=False):
        for fl in files:
          currentFile=os.path.join(root, fl)
          ext=fl[fl.rfind('.'):]
          if ext not in extList:
            extList.append(ext)
          List.append(currentFile)
    
    print extList
    
    for ext in extList:
        ext+"Counter"=0
        ext+"FileList"=[]
    
    for fl in List:
        ext=fl[fl.rfind('.'):]
        ext+"Counter"+=1
        ext+"FileList".append(fl)
    
    for ext in extList:
        print ext
        print ext+"Counter"
        print ext+"FileList"
    

    根据答案更新了CODE。 txt文件问题,因为它只创建一个文本文件。

    # iterate over dictionary keys
    for elem in ext_dict.keys():
        print elem
        print ext_dict[elem]["Counter"]
        print ext_dict[elem]["FileList"]
        log = open(elem+'_Log.txt', 'a')
        Num=0
        for fl in ext_dict[elem]["FileList"]:
            Num+=1
            log.write(str(Num)+","+str(fl)+"\n")
        log.close()
    

    任何人都可以使用的最终脚本如下。

    #-------------------------------------------------------------------------------
    # Name:    File_Review
    # Purpose: Review of all files in directory/subdirectories with report on file type and size
    #
    # Author:      georgec
    #
    # Created:     25/01/2013
    # Copyright:   (c) ATGIS 2013
    # Licence:     Creative Commons 3.0 - BY
    #-------------------------------------------------------------------------------
    
    import os, sys, datetime
    
    top = os.getcwd() # change to a specific path if required.
    RootOutput = top
    SourceDIR = top
    SourceDIR = r'P:\2013'
    outDIR = top # directory where output is written to. Includes temp files
    finalDIR = top+"\\final" # folder for final data only
    DirLimiterList=['']
    
    # END setting base paths
    # NOTHING BELOW should need editing.
    
    os.chdir(top)
    
    def InvestigateFiles(SourceDIR,outDIR,DirLimiter):
        List =[]
        extList=[]
        dirList=[]
        dirCount=0
        for root, dirs, files in os.walk(SourceDIR, topdown=False):
            for fl in files:
                currentFile=os.path.join(root, fl)
                ext=fl[fl.rfind('.')+1:]
                if ext!='':
                    if DirLimiter in currentFile:
                        List.append(currentFile)
                        directory1=os.path.basename(os.path.normpath(currentFile[:currentFile.rfind(DirLimiter)]))
                        directory2=(currentFile[len(SourceDIR):currentFile.rfind('\\'+directory1+DirLimiter)])
                        directory=directory2+'\\'+directory1
                        if directory not in dirList:
                            dirCount+=1
                            dirList.append(directory)
    
    
                if ext not in extList:
                  extList.append(ext)
    
        print extList
    
        ext_dict = {}
    
        # Create the dictionary
        for ext in extList:
            ext_dict[ext] = {}
            ext_dict[ext]["Counter"] = 0
            ext_dict[ext]["FileList"] = []
    
        #populate the dictionary
        for fl in List:
            if ext_dict.has_key(fl[fl.rfind('.')+1:]):
                ext = fl[fl.rfind('.')+1:]
                ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
                ext_dict[ext]["FileList"].append(fl)
    
        # iterate over dictionary keys
        for elem in ext_dict.keys():
            uniqueDirList=[]
            print elem
            print ext_dict[elem]["Counter"]
            count= ext_dict[elem]["Counter"]
            print ext_dict[elem]["FileList"]
            log = open(elem+'_'+DirLimiter[DirLimiter.find('\\')+1:DirLimiter.rfind('\\')]+'_Log.txt', 'a')
            Num=0
            for fl in ext_dict[elem]["FileList"]:
                Num+=1
                log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+"\n")
    ##            finaldir=fl[fl.rfind(DirLimiter):fl.rfind('\\')]
    ##            directory2=fl[fl.rfind('\\Input\\')+6:fl.rfind('\\')]
    ##            uniqueDir=directory2+finaldir
    ##            if uniqueDir not in uniqueDirList:
    ##             uniqueDirList.append(uniqueDir)
    ##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";"+str(uniqueDir)+'\n')
    ##             log.write(finaldir+"\n"+directory2+"\n"+uniqueDir+"\n"+"\n")
    ##            else:
    ##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";\n")
    ##        log.write('Directories: '+str(count)+'\n Unique Directories: '+str(len(uniqueDirList)))
            log.close()
    
    for DirLimiter in DirLimiterList:
     InvestigateFiles(SourceDIR,outDIR,DirLimiter)
    

1 个答案:

答案 0 :(得分:1)

你应该使用字典来存储数据

ext_dict = {}

# Create the dictionary
for ext in extList:
    ext_dict[ext] = {}
    ext_dict[ext]["Counter"] = 0
    ext_dict[ext]["FileList"] = []

#populate the dictionary
for fl in List:
    if ext_dict.has_key(f1[f1.rfind('.'):]):
        ext = f1[f1.rfind('.'):]
        ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
        ext_dict[ext]["FileList"].append(fl)

# iterate over dictionary keys
for elem in ext_dict.keys():
    print elem
    print ext_dict[elem]["counter"]
    print ext_dict[elem]["FileList"]