我有大量文件,我需要遍历它们并搜索一些字符串,当找到字符串时,文件被复制到新文件夹中,否则将其关闭。
以下是示例代码:
import os
import stringsfilter
def apply_filter(path, filter_dict):
dirlist = os.listdir(path)
for directory in dirlist:
pwd = path + '/' + directory
filelist = os.listdir(pwd)
for filename in filelist:
if filename.split('.')[-1] == "stats":
sfilter = stringsfilter.StringsFilter(pwd, filename, filter_dict["strings"])
sfilter.find_strings_and_move()
这里是stringsfilter.py:
import main
import codecs
import os
import shutil
class StringsFilter:
strings = None
def __init__(self, filepath, filename, strings):
self.filepath = filepath
self.filename = filename
self.strings = strings
self.logger = main.get_module_logger("StringsFilter")
self.file_desc = codecs.open(self.filepath + '/' + self.filename, 'r', encoding="utf-8-sig")
self.logger.debug("[-] Strings: " + str(self.strings))
self.logger.debug("[-] Instantiating class Strings Filter, filename: %s " % self.filename)
def find_strings_and_move(self):
for line in self.file_desc.readlines():
for string in self.strings:
if string in line:
self.move_to_folder()
return
self.close()
def move_to_folder(self):
name = self.filename.split('.')[0]
os.mkdir(self.filepath + '/' + name)
shutil.copyfile(self.filepath + '/' + self.filename,
self.filepath + '/' + name + '/' + self.filename)
self.close()
def close(self):
if self.file_desc:
self.logger.debug("[-] Closing file %s" % self.filename)
self.file_desc.close()
main.py:
import logging
def get_module_logger(name):
# create logger
logger = logging.getLogger(name)
# set logging level to log everything
logger.setLevel(logging.DEBUG)
# create file handler which logs everything
fh = logging.FileHandler('files.log')
fh.setLevel(logging.DEBUG)
# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlersi
formatter = logging.Formatter('[%(asctime)s] [%(name)-17s] [%(levelname)-5s] - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(fh)
logger.addHandler(ch)
return logger
在日志中我可以看到以下内容:
[2016-10-13 10:07:07,002] [StringsFilter ] [DEBUG] - [-] Strings: ['DEVICE_PROBLEM']
[2016-10-13 10:07:07,002] [StringsFilter ] [DEBUG] - [-] Instantiating class Strings Filter, filename: file1.stats
[2016-10-13 10:07:07,003] [StringsFilter ] [DEBUG] - [-] Closing file file1.stats
[2016-10-13 10:07:07,003] [StringsFilter ] [DEBUG] - [-] Strings: ['DEVICE_PROBLEM']
[2016-10-13 10:07:07,003] [StringsFilter ] [DEBUG] - [-] Strings: ['DEVICE_PROBLEM']
[2016-10-13 10:07:07,004] [StringsFilter ] [DEBUG] - [-] Instantiating class Strings Filter, filename: file2.stats
[2016-10-13 10:07:07,004] [StringsFilter ] [DEBUG] - [-] Instantiating class Strings Filter, filename: file2.stats
[2016-10-13 10:07:07,004] [StringsFilter ] [DEBUG] - [-] Closing file file2.stats
[2016-10-13 10:07:07,004] [StringsFilter ] [DEBUG] - [-] Closing file file2.stats
[2016-10-13 10:07:07,005] [StringsFilter ] [DEBUG] - [-] Strings: ['DEVICE_PROBLEM']
[2016-10-13 10:07:07,005] [StringsFilter ] [DEBUG] - [-] Strings: ['DEVICE_PROBLEM']
[2016-10-13 10:07:07,005] [StringsFilter ] [DEBUG] - [-] Strings: ['DEVICE_PROBLEM']
[2016-10-13 10:07:07,005] [StringsFilter ] [DEBUG] - [-] Instantiating class Strings Filter, filename: file3.stats
[2016-10-13 10:07:07,005] [StringsFilter ] [DEBUG] - [-] Instantiating class Strings Filter, filename: file3.stats
[2016-10-13 10:07:07,005] [StringsFilter ] [DEBUG] - [-] Instantiating class Strings Filter, filename: file3.stats
[2016-10-13 10:07:07,006] [StringsFilter ] [DEBUG] - [-] Closing file file3.stats
[2016-10-13 10:07:07,006] [StringsFilter ] [DEBUG] - [-] Closing file file3.stats
[2016-10-13 10:07:07,006] [StringsFilter ] [DEBUG] - [-] Closing file file3.stats
接下来,似乎每次迭代, init 的每个语句再次完成,直到打开的文件太多而程序以
结束OSError: [Errno 24] Too many files open
我无法理解为什么每次创建实例时都会多次调用来自 init 的语句。
答案 0 :(得分:0)
您多次记录相同内容的原因:
每次调用main.get_module_logger("StringsFilter")
时,您都会从logger.addHandler(...)
返回的相同的记录器上调用logging.getLogger(name)
,因此您可以在一个记录器中获得多个处理程序。更好地制作模块级记录器
import ...
LOG = main.get_module_logger("StringsFilter")
class StringsFilter:...
关于打开文件,我没有看到原因,但请考虑在with open(filename) as f:
中使用find_strings_and_move()
语法。
LOG = main.get_module_logger("StringsFilter")
class StringsFilter:
strings = None
def __init__(self, filepath, filename, strings):
self.filepath = filepath
self.filename = filename
self.strings = strings
LOG.debug("[-] Strings: " + str(self.strings))
LOG.debug("[-] Instantiating class Strings Filter, filename: %s " % self.filename)
def find_strings_and_move(self):
with open(self.filepath + '/' + self.filename, 'r') as file_desc:
lines = file_desc.readlines()
for line in lines:
for string in self.strings:
if string in line:
self.move_to_folder()
return
def move_to_folder(self):
name = self.filename.split('.')[0]
os.mkdir(self.filepath + '/' + name)
shutil.copyfile(self.filepath + '/' + self.filename,
self.filepath + '/' + name + '/' + self.filename)
通过这种方式,您可以确保文件已关闭1)之前移动2)始终