我正在运行一个替换文件名中的德语变音符号的脚本。我需要执行此操作的文件超过1700个,但是在脚本运行一段时间后,我收到的错误表明打开的文件太多。任何人有任何想法如何解决这个问题?非常感谢您的反馈!
代码:
# -*- coding: utf-8 -*-
''' Script replaces all umlauts in filenames within a root directory and its subdirectories with the English
equivalent (ie. ä replaced with ae, Ä replaced with Ae).'''
import os
import itertools
import logging
from itertools import groupby
##workspace = u'G:\\Dvkoord\\GIS\\TEMP\\Tle\\Scripts\\Umlaut'
workspace = u'G:\\Gis\\DATEN'
log = 'Umlauts.log'
logPath = r"G:\Dvkoord\GIS\TEMP\Tle\Scripts\Umlaut\Umlauts.log"
logMessageFormat = '%(asctime)s - %(levelname)s - %(message)s'
def GetFilepaths(directory):
"""Function returns a list of file paths in a directory tree using os.walk. Parameter: directory
"""
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
filepath = os.path.join(root, filename)
file_paths.append(filepath)
## file_paths = list(set(file_paths))
return file_paths
def uniq(input):
output = []
for x in input:
if x not in output:
output.append(x)
return output
def Logging(logFile, logLevel, destination, textFormat, comment):
"""Function writes a log file. Parameters: logFile (name the log file w/extension),
logLevel (DEBUG, INFO, etc.), destination (path under which the log file will be
saved including name and extension), textFormat (how the log text will be formatted)
and comment.
"""
# logging
logger = logging.getLogger(__name__)
# set log level
logger.setLevel(logLevel)
# create a file handler for the log -- unless a separate path is specified, it will output to the directory where this script is stored
logging.FileHandler(logFile)
handler = logging.FileHandler(destination)
handler.setLevel(logLevel)
# create a logging format
formatter = logging.Formatter(textFormat)
handler.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(handler)
logger.info(comment)
def main():
# dictionary of umlaut unicode representations (keys) and their replacements (values)
umlautDictionary = {
u'Ä': 'Ae',
u'Ö': 'Oe',
u'Ü': 'Ue',
u'ä': 'ae',
u'ö': 'oe',
u'ü': 'ue',
u'ß': 'ss'
}
dataTypes = [".CPG",
".dbf",
".prj",
".sbn",
".sbx",
".shp",
".shx",
".shp.xml",
".lyr"]
# get file paths in root directory and subfolders
filePathsList = GetFilepaths(workspace)
# put all filepaths with an umlaut in filePathsUmlaut list
filePathsUmlaut = []
for fileName in filePathsList:
## print fileName
for umlaut in umlautDictionary:
if umlaut in os.path.basename(fileName):
for dataType in dataTypes:
if dataType in fileName:
## print fileName
filePathsUmlaut.append(fileName)
# remove duplicate paths from filePathsUmlaut
uniquesUmlauts = uniq(filePathsUmlaut)
# create a dictionary for umlaut translation
umap = {
ord(key):unicode(val)
for key, val in umlautDictionary.items()
}
# use translate and umap dictionary to replace umlauts in file name and put them in the newFilePaths list
# without changing any of the umlauts in folder names or upper directories
newFilePaths = []
for fileName in uniquesUmlauts:
pardir = os.path.dirname(fileName)
baseName = os.path.basename(fileName)
newBaseFileName = baseName.translate(umap)
newPath = os.path.join(pardir, newBaseFileName)
newFilePaths.append(newPath)
newFilePaths = uniq(newFilePaths)
# create a dictionary with the old umlaut path as key and new non-umlaut path as value
dictionaryOldNew = dict(itertools.izip(uniquesUmlauts, newFilePaths))
# rename old file (key) as new file (value)
for files in uniquesUmlauts:
for key, value in dictionaryOldNew.iteritems():
if key == files:
comment = '%s'%files + ' wurde als ' '%s'%value + ' umbenannt.'
print comment
if os.path.exists(value):
os.remove(value)
os.rename(files, value)
Logging(log, logging.INFO, logPath, logMessageFormat, comment)
if __name__ == '__main__':
main()
答案 0 :(得分:5)
我认为问题是你的Logging
功能。每次登录时,您都要创建一个新的FileHandler
并将其添加到处理程序集中,并为重命名的每个文件执行此操作,这样您就可以快速达到打开文件描述符的限制。配置您的记录器一次,然后多次使用它,每次使用时都不要配置它。
请注意,Logging
中可能不会引发异常;在Windows上删除文件涉及将其打开以进行删除,因此您可以使用记录器最大限度地打开文件,然后在尝试删除文件时失败。