我有一个目录日志文件。我想使用Python脚本处理此目录中的每个文件。
for file in directory:
# do something
我该怎么做?
答案 0 :(得分:21)
使用os.listdir()
或os.walk()
,具体取决于您是否要以递归方式执行此操作。
答案 1 :(得分:6)
在Python 2中,您可以尝试类似:
import os.path
def print_it(x, dir_name, files):
print dir_name
print files
os.path.walk(your_dir, print_it, 0)
注意:os.path.walk的第三个参数是你想要的。你会得到它作为回调的第一个arg。
在Python 3中os.path.walk
已被删除;请改用os.walk
。您只需将目录传递给它,而不是进行回调,它会产生(dirpath, dirnames, filenames)
三元组。因此,上面的粗略等价变为
import os
for dirpath, dirnames, filenames in os.walk(your_dir):
print dirpath
print dirnames
print filenames
答案 2 :(得分:3)
import os
# location of directory you want to scan
loc = '/home/sahil/Documents'
# global dictonary element used to store all results
global k1
k1 = {}
# scan function recursively scans through all the diretories in loc and return a dictonary
def scan(element,loc):
le = len(element)
for i in range(le):
try:
second_list = os.listdir(loc+'/'+element[i])
temp = loc+'/'+element[i]
print "....."
print "Directory %s " %(temp)
print " "
print second_list
k1[temp] = second_list
scan(second_list,temp)
except OSError:
pass
return k1 # return the dictonary element
# initial steps
try:
initial_list = os.listdir(loc)
print initial_list
except OSError:
print "error"
k =scan(initial_list,loc)
print " ..................................................................................."
print k
我将此代码作为目录扫描程序,为我的音频播放器制作播放列表功能,并以递归方式扫描目录中的所有子目录。
答案 3 :(得分:3)
您可以像这样以递归方式列出目录中的每个文件。
from os import listdir
from os.path import isfile, join, isdir
def getAllFilesRecursive(root):
files = [ join(root,f) for f in listdir(root) if isfile(join(root,f))]
dirs = [ d for d in listdir(root) if isdir(join(root,d))]
for d in dirs:
files_in_d = getAllFilesRecursive(join(root,d))
if files_in_d:
for f in files_in_d:
files.append(join(root,f))
return files
答案 4 :(得分:2)
您可以尝试glob
:
import glob
for file in glob.glob('log-*-*.txt'):
# Etc.
但是glob
递归不起作用(据我所知),所以如果你的日志在该目录中的文件夹中,你最好还是看看 Ignacio Vazquez-Abrams 发布。
答案 5 :(得分:2)
import os
rootDir = '.'
for dirName, subdirList, fileList in os.walk(rootDir):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)
# Remove the first entry in the list of sub-directories
# if there are any sub-directories present
if len(subdirList) > 0:
del subdirList[0]
答案 6 :(得分:1)
如果您需要检查多种文件类型,请使用
glob.glob("*.jpg") + glob.glob("*.png")
Glob不关心列表中文件的顺序。如果您需要按文件名排序的文件,请使用
sorted(glob.glob("*.jpg"))
答案 7 :(得分:0)
这是我基于Matheus Araujo的答案的递归文件遍历器的版本,它可以采用可选的排除列表参数,这在处理不需要某些导演/文件/文件扩展名的树形副本时非常有用
import os
def get_files_recursive(root, d_exclude_list=[], f_exclude_list=[], ext_exclude_list=[], primary_root=None):
"""
Walk a path to recursively find files
Modified version of https://stackoverflow.com/a/24771959/2635443 that includes exclusion lists
:param root: path to explore
:param d_exclude_list: list of root relative directories paths to exclude
:param f_exclude_list: list of filenames without paths to exclude
:param ext_exclude_list: list of file extensions to exclude, ex: ['.log', '.bak']
:param primary_root: Only used for internal recursive exclusion lookup, don't pass an argument here
:return: list of files found in path
"""
# Make sure we use a valid os separator for exclusion lists, this is done recursively :(
d_exclude_list = [os.path.normpath(d) for d in d_exclude_list]
files = [os.path.join(root, f) for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))
and f not in f_exclude_list and os.path.splitext(f)[1] not in ext_exclude_list]
dirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
for d in dirs:
p_root = os.path.join(primary_root, d) if primary_root is not None else d
if p_root not in d_exclude_list:
files_in_d = get_files_recursive(os.path.join(root, d), d_exclude_list, f_exclude_list, ext_exclude_list, primary_root=p_root)
if files_in_d:
for f in files_in_d:
files.append(os.path.join(root, f))
return files
答案 8 :(得分:0)
这是我上一个版本的更新,该版本在排除列表中接受通配符通配符。 该函数基本上会进入给定路径的每个子目录,并从这些目录返回所有文件的列表作为相对路径。 函数的工作方式类似于Matheus的答案,并且可以使用可选的排除列表。
例如:
files = get_files_recursive('/some/path')
files = get_files_recursive('/some/path', f_exclude_list=['.cache', '*.bak'])
files = get_files_recursive('C:\\Users', d_exclude_list=['AppData', 'Temp'])
files = get_files_recursive('/some/path', ext_exclude_list=['.log', '.db'])
希望这可以帮助某人,例如该主题的最初答案对我有所帮助:)
import os
from fnmatch import fnmatch
def glob_path_match(path, pattern_list):
"""
Checks if path is in a list of glob style wildcard paths
:param path: path of file / directory
:param pattern_list: list of wildcard patterns to check for
:return: Boolean
"""
return any(fnmatch(os.path.basename(path), pattern) for pattern in pattern_list)
def get_files_recursive(root, d_exclude_list=[], f_exclude_list=[], ext_exclude_list=[], primary_root=None):
"""
Walk a path to recursively find files
Modified version of https://stackoverflow.com/a/24771959/2635443 that includes exclusion lists
and accepts glob style wildcards on files and directories
:param root: path to explore
:param d_exclude_list: list of root relative directories paths to exclude
:param f_exclude_list: list of filenames without paths to exclude
:param ext_exclude_list: list of file extensions to exclude, ex: ['.log', '.bak']
:param primary_root: Only used for internal recursive exclusion lookup, don't pass an argument here
:return: list of files found in path
"""
# Make sure we use a valid os separator for exclusion lists, this is done recursively :(
d_exclude_list = [os.path.normpath(d) for d in d_exclude_list]
files = [os.path.join(root, f) for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))
and not glob_path_match(f, f_exclude_list) and os.path.splitext(f)[1] not in ext_exclude_list]
dirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
for d in dirs:
p_root = os.path.join(primary_root, d) if primary_root is not None else d
if not glob_path_match(p_root, d_exclude_list):
files_in_d = get_files_recursive(os.path.join(root, d), d_exclude_list, f_exclude_list, ext_exclude_list,
primary_root=p_root)
if files_in_d:
for f in files_in_d:
files.append(os.path.join(root, f))
return files