Python Os.walk过滤器文件名(不包含和跳过dirs的扩展名)

时间:2014-07-30 21:32:55

标签: python python-2.7

filetypes = ("*.jpg","*.txt","*.csv")
filelist = []

for root, dirnames, filenames in os.walk("c:\\"):
 for ft in filetypes:
  for f in fnmatch.filter(filenames, ft):
   filelist.append(os.path.join(root, f))

我有这个代码,它只会在我的列表中添加带有我提供的扩展名的文件,

1)我想反过来添加所有文件扩展名"*.*"并过滤其中一些我不需要的文件扩展名"*.dat","*.dll","*.log","*.exe"

2)另外我不需要c:\\windows c:\\program files c:\\else中的文件我也可以过滤它吗?

3)我需要快速发现这个示例代码来自其他答案似乎更快但是这种类型的函数os.walk的主要速度问题是什么?如果是这样scandir github项目的os.walk改进功能快7-20倍,或者如果它是通过扩展名过滤文件匹配我想过滤20多个扩展名吗?

    import os
    extns = ('.jpg', '.jpeg', '.png', '.tif', '.tiff')
    matches = []
    for root, dirnames, fns in os.walk("C:\\"):
        matches.extend(os.path.join(root, fn) for fn in fns if fn.lower().endswith(extns))

非常感谢您的帮助

1 个答案:

答案 0 :(得分:0)

#!/usr/bin/python2.7

import os
import sys
import re
import fnmatch

def findit(root, exclude_files=[], exclude_dirs=[]):
    exclude_files = (fnmatch.translate(i) for i in exclude_files)
    exclude_files = '('+')|('.join(exclude_files)+')'
    exclude_files = re.compile(exclude_files)
    exclude_dirs = (os.path.normpath(i) for i in exclude_dirs)
    exclude_dirs = (os.path.normcase(i) for i in exclude_dirs)
    exclude_dirs = set(exclude_dirs)
    return (os.path.join(r,f)
           for r,_,f in os.walk(root)
           if os.path.normpath(os.path.normcase(r)) not in exclude_dirs
           for f in f
           if not exclude_files.match(os.path.normcase(f)))

if __name__ == '__main__':
   # If you need the entire list in memory at once
   filelist = list(findit('c:/',
                exclude_files = ['*.dll', '*.dat', '*.log', '*.exe'],
                exclude_dirs = ['c:/windows', 'c:/program files', 'c:/else'],
               ))

   # Or this, if you need the items one at a time (saves memory):
   for filename in findit('c:/',
                exclude_files = ['*.dll', '*.dat', '*.log', '*.exe'],
                exclude_dirs = ['c:/windows', 'c:/program files', 'c:/else'],
               ):
       print filename # or stat() or open() the file, or whatever.