How to properly close a file with signal handler

时间:2018-12-19 11:28:34

标签: python thread-safety signals

I'm using the following code to monitor file access from a running job.
When the job is stopped my code receive a SIGINT.
As this job is very intensive, there's buffered IO and I can't unbuffered those writes, and I want a precise log.
So I tried to catch SIGINT and flush the file before shutting down my script I end up with :

RuntimeError: reentrant call inside <_io.BufferedWriter name=

As I understand from several articles I read, it's impossible to consistently use write/print/flush command as they are not thread safe in a signal handler.

My question is how can I ensure that my file is written properly before shutting down the script ?

Here's a simpler version of my script:

import signal
import sys
import os
import time
from time import strftime
import inotify.adapters

separator = ';'
jump = '\n'
logfile_pointer = open("path/to/log/file", 'w')

#Try to close nicely everything
def signal_handler(signal, frame):
    logfile_pointer.flush()
    logfile_pointer.close()
    sys.exit(0)

#Register signal handler
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGHUP, signal_handler)

eventHandler = inotify.adapters.InotifyTrees(["/folder/one","/folder/two"])

for event in eventHandler.event_gen():
    if event is not None:
        (_, type_names, path, filename) = event
        try:
            timestamp = '%.2f'%(time.time())
            filepath=path +'/'+ filename
            logfile_pointer.write ("{}{}{}{}{}{}{}{}".format(timestamp, separator, filepath , separator , type_names[0] ,separator, os.path.getsize(filepath) , jump )
        except os.error as e:
            pass

2 个答案:

答案 0 :(得分:2)

这里的典型方法是让信号处理程序设置一个标志,然后不退出就返回。主循环检查该标志,并在设置该标志时对其进行清理并退出。

在这种情况下,这意味着您需要定期让事件生产者屈服。使用PyInotify可以通过设置较短的超时时间来实现。最终看起来像

[...]

exit_requested = False

def signal_handler(signal, frame):
    # Perhaps check which signal was received...
    exit_requested = True

[...]

for event in eventHandler.event_gen(timeout_s = 1):
    if exit_requested:
        # Clean up and exit
    if event:
        ...

event_gen由于超时而返回None时,在下一次调用event_gen之前发生的inotify事件将排队,并且不会丢失:从中读取inotify事件会被消耗inotify文件描述符,并且这里的事件处理程序将其保持打开状态。

答案 1 :(得分:0)

我有几个问题要解决,一个是停止脚本运行的方式,因为Python具有一些奇怪的线程概念,这是我的解决方案:
定义一个将成为inotify观察者的线程:

import os
import sys
import time
import signal
import argparse
import inotify.adapters
from time import strftime
from threading import Thread
from argparse import RawTextHelpFormatter


class EventMonitor(Thread):
    separator = ';'
    jump = '\n'
    def __init__(self, folders, logfile):
        Thread.__init__(self)
        check_message=''
        self.eventHandler = None
        self.stop = False
        self.logfile = open(logfile,'w',buffering=bufferSize)
        self.line_count = 0
        self.alive=True
        self.eventHandler = inotify.adapters.InotifyTrees(folders)


    def run(self):
        while not self.stop:
            for event in self.eventHandler.event_gen( timeout_s = 3 ):
                try:
                    if event is not None:
                        (_, type_names, path, filename) = event
                        timestamp = '%.2f'%(time.time())
                        filepath=path +'/'+ filename
                        self.logfile.write ("{}{}{}{}{}{}{}{}".format(timestamp, self.separator, filepath , self.separator , type_names[0] ,self.separator, os.path.getsize(filepath) , self.jump ))
                except os.error as e:
                    pass

        for event in self.eventHandler.event_gen( timeout_s = 1 ):
            try:
                if event is not None:
                    (_, type_names, path, filename) = event
                    timestamp = '%.2f'%(time.time())
                    filepath=path +'/'+ filename
                    self.logfile.write ("{}{}{}{}{}{}{}{}".format(timestamp, self.separator, filepath , self.separator , type_names[0] ,self.separator, os.path.getsize(filepath) , self.jump ))
            except os.error as e:
                pass
        self.logfile.flush()
        self.logfile.close()
        self.alive=False


    def stopped(self):
        if not self.stop:
            self.stop = True
        else:
            print("Event Monitoring is already disabled")

    def isAlive(self):
        return self.alive

然后在我的主脚本中:

import os
import sys
import time
import signal
import argparse
import traceback
from time import strftime
from CPUMonitor import CPUMonitor
from EventMonitor import EventMonitor
from argparse import RawTextHelpFormatter


#define argument
parser = argparse.ArgumentParser(description='attache spies on multiple folders in argument and generate a csv log file containing a list of event on files.File is formatted like this:  \ntimestamp;fullpath;event;size\n123456897.25;/path/file;IN_OPEN;0\n/123456899.25;path/file;IN_CLOSE;1234\n.....\nFor more info about inotify events => `man inotify`',formatter_class=RawTextHelpFormatter)
parser.add_argument("-l", "--log-folder",type=str, help="Destination folder for the logs. If no value /tmp is used", default='/tmp')
parser.add_argument("-e", "--event", help="enable file event watch ",action="store_true")
parser.add_argument( 'folders', metavar='folderpath', type=str ,help='a list of folder path to spy on if -e is not set this will be ignore.', nargs = '*', default=[os.getcwd()])
args = parser.parse_args()

#Try to close nicely everything
def signal_handler(signal, frame):
    if CPU_thread is not None:
        CPU_thread.stopped()
    if Event_thread is not None:
        Event_thread.stopped()
    print('Kill signal receive.{}CPU and Event monitoring stopped.{}'.format(jump,jump))
    sys.exit(0)
#Register signal handler
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGHUP, signal_handler)

try:
    #define variable
    separator = ';'
    jump = '\n'
    logDest = ''
    go = True
    Event_logfile = None
    Event_logfile_debug = None
    Event_thread = None
    jobname = ''
    check_message=''

    if not os.path.isdir(args.log_folder):
        go=False
        check_message = check_message + "/!\ Log folder {} is not a directory. Monitoring won't start{}".format(args.log_folder,jump)
    elif not os.access(args.log_folder, os.W_OK | os.X_OK) :
        go=False
        check_message = check_message + "/!\ Log folder {} is not writable. Monitoring won't start{}".format(args.log_folder,jump)
    else:
        check_message = check_message + "Log folder is a proper directory and can be RW. {}".format(jump)

    if not go :
        print(check_message)
        sys.exit(-2)

    if go :
        event_logfile = args.log_folder + '/Event_'+os.environ['JOB_ID'] + '_' + strftime("%Y-%m-%d_%H:%M:%S") + '-log.txt'
        print('Event logfile: {}{}'.format(event_logfile,jump) )
        print( 'Start monitoring of the event on: {} {}'.format( args.folders, jump ))
        Event_thread = EventMonitor(args.folders, event_logfile)
        Event_thread.start()
    else:
        print(("Error detected, monitoring hasn't started{}".format(jump)))
        sys.exit(-4)

    while Event_thread is not None and Event_thread.isAlive() :
        time.sleep(5)

    if Event_thread is not None:
        Event_thread.join()

except Exception as error:
    traceback.print_exc()
    print(str(error))
    sys.exit(-5)  

在线程中,只要不停止线程,它就会查找事件并将其写入文件中。
调用stopped()时,循环将在3秒钟后无事件超时,然后我最后一次以较短的1秒钟超时启动事件循环,一旦处理了所有事件,线程就会停止,并且isAlive()返回False。
在主程序中,当收到SIGINTSIGHUP时,它会要求线程停止,并且只有在线程正确停止后python脚本才会停止。
此代码在Python 2.7.15和3.6.7及更高版本中均可运行;但是,请记住,这是我的代码的简化版本,它可能无法按原样工作,并且可能需要进行一些调整。

PS:感谢Stephen的回答,这对我有很大帮助。