我正在尝试在目录结构中找到最大的文件,以便我可以使用该信息来帮助创建数据库。
以下是代码:
import os
import datetime
def get_files(target):
# Get file size and modified time for all files from the target directory and down.
# Initialize files list
filelist = []
# Walk the directory structure
for root, dirs, files in os.walk(target):
# Do not walk into directories that are mount points
dirs[:] = filter(lambda dir: not os.path.ismount(os.path.join(root, dir)), dirs)
for name in files:
# Construct absolute path for files
filename = os.path.join(root, name)
# Test the path to account for broken symlinks
if os.path.exists(filename):
# File size information in bytes
size = float(os.path.getsize(filename))
# Get the modified time of the file
#mtime = os.path.getmtime(filename)
# Create a tuple of filename, size, and modified time
construct = filename, size, #str(datetime.datetime.fromtimestamp(mtime))
# Add the tuple to the master filelist
filelist.append(construct)
print(sorted([filelist]))
# with open("/home/dave/sizes.txt", 'w') as size_file:
# contents = filelist.readline()
get_files("/home/dave/TL/case")
如您所见,我是新手,不知道如何将函数的结果传递给文件。
我的最终目标是找到最大的文件及其大小。它可以转到文件或stdout。
我错过了什么?
答案 0 :(得分:2)
只需将您的函数设为生成器函数,并使用文件大小作为max
的键调用itemgetter(1)
:
import os
def get_files(target):
for root, dirs, files in os.walk(target):
# Do not walk into directories that are mount points
dirs[:] = filter(lambda d: not os.path.ismount(os.path.join(root, d)), dirs)
for name in files:
# Construct absolute path for files
filename = os.path.join(root, name)
# Test the path to account for broken symlinks
if os.path.exists(filename):
# File size information in bytes
yield filename, os.path.getsize(filename)
这将允许您重复使用您喜欢的功能:
In [5]: from operator import itemgetter
In [6]: max(get_files("."),key=itemgetter(1))
Out[6]:
('./node_modules/browser-sync/node_modules/socket.io/node_modules/socket.io-parser/bg.gif',
1277113)
如果您想按字母顺序按名称对文件进行排序:
sorted(get_files("path"))
按大小排序:
sorted(get_files("path"), key=itemgetter(1))
答案 1 :(得分:0)
这是一种冗长的方法。首先,我创建了一个文件名和文件大小tuples的列表。然后我遍历列表并保存最大的文件名称和大小。
import os
fileSizeTupleList = []
largestSize = 0
for i in os.listdir(os.curdir):
if os.path.isfile(i):
fileSizeTupleList.append((i, os.path.getsize(i)))
for fileName, fileSize in fileSizeTupleList:
if fileSize > largestSize:
largestSize = fileSize
largestFile = fileName
print(largestFile, largestSize)
这是一种递归方法:
import os
fileSizeTupleList = []
largestSize = 0
for root, dirs, files in os.walk(os.curdir):
for file in files:
fileSizeTupleList.append((file, os.path.getsize(os.path.join(root, file))))
for fileName, fileSize in fileSizeTupleList:
if fileSize > largestSize:
largestSize = fileSize
largestFile = fileName
print(largestFile, largestSize)