Question

我一直在编写一个小的Python脚本，使用subprocess模块和辅助函数执行一些shell命令：

import subprocess as sp
def run(command, description):
    """Runs a command in a formatted manner. Returns its return code."""
    start=datetime.datetime.now()
    sys.stderr.write('%-65s' % description)
    s=sp.Popen(command, shell=True, stderr=sp.PIPE, stdout=sp.PIPE)
    out,err=s.communicate()
    end=datetime.datetime.now()
    duration=end-start
    status='Done' if s.returncode==0 else 'Failed'
    print '%s (%d seconds)' % (status, duration.seconds)

以下行读取标准输出和错误：

    s=sp.Popen(command, shell=True, stderr=sp.PIPE, stdout=sp.PIPE)
    out,err=s.communicate()

如您所见，未使用stdout和stderr。假设我想以格式化的方式将输出和错误消息写入日志文件，例如：

[STDOUT: 2011-01-17 14:53:55] <message>
[STDERR: 2011-01-17 14:53:56] <message>

我的问题是，最恐怖的方式是什么？我想到了三个选择：

继承文件对象并覆盖write方法。
使用实现write。
以某种方式连接到PIPE。

更新：参考测试脚本

我正在使用此脚本检查结果，保存为test.py：

#!/usr/bin/python
import sys

sys.stdout.write('OUT\n')
sys.stdout.flush()
sys.stderr.write('ERR\n')
sys.stderr.flush()

有什么想法吗？

Answer 1

1和2是合理的解决方案，但是覆盖write（）是不够的。

问题是Popen需要文件句柄附加到进程，因此Python文件对象不起作用，它们必须是操作系统级别。要解决这个问题，你必须拥有一个具有os级文件句柄的Python对象。我能想到解决的唯一方法是使用管道，所以你有一个os级文件句柄来写。但是接下来你需要另一个线程来坐管并轮询该管道以便读取内容以便它可以记录它。（所以这更严格地说是2的实现，因为它委托给日志记录）。

说完了：

import io
import logging
import os
import select
import subprocess
import time
import threading

LOG_FILENAME = 'output.log'
logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)

class StreamLogger(io.IOBase):
    def __init__(self, level):
        self.level = level
        self.pipe = os.pipe()
        self.thread = threading.Thread(target=self._flusher)
        self.thread.start()

    def _flusher(self):
        self._run = True
        buf = b''
        while self._run:
            for fh in select.select([self.pipe[0]], [], [], 0)[0]:
                buf += os.read(fh, 1024)
                while b'\n' in buf:
                    data, buf = buf.split(b'\n', 1)
                    self.write(data.decode())
            time.sleep(1)
        self._run = None

    def write(self, data):
        return logging.log(self.level, data)

    def fileno(self):
        return self.pipe[1]

    def close(self):
        if self._run:
            self._run = False
            while self._run is not None:
                time.sleep(1)
            os.close(self.pipe[0])
            os.close(self.pipe[1])

因此该类启动一个os级别管道，Popen可以将stdin / out / error附加到子进程。它还会启动一个线程，每秒一次轮询该管道的另一端以记录事物，然后记录日志模块。

可能这个类应该为完整性实现更多的东西，但无论如何它都适用于这种情况。

示例代码：

with StreamLogger(logging.INFO) as out:
    with StreamLogger(logging.ERROR) as err:
        subprocess.Popen("ls", stdout=out, stderr=err, shell=True)

output.log最终如下：

INFO:root:output.log
INFO:root:streamlogger.py
INFO:root:and
INFO:root:so
INFO:root:on

使用Python 2.6,2.7和3.1测试。

我认为1和3的任何实现都需要使用类似的技术。它有点涉及，但除非你能让Popen命令自己正确记录，否则我没有更好的想法。）

Answer 2

我建议选项3，使用logging标准库包。在这种情况下，我会说另外两个是矫枉过正的。

Answer 3

这使用Adam Rosenfield's make_async and read_async。虽然我的原始答案使用select.epoll并且因此仅使用Linux，但它现在使用select.select，它应该在Unix或Windows下运行。

这会将子进程的输出记录到/tmp/test.log：

import logging
import subprocess
import shlex
import select
import fcntl
import os
import errno

def make_async(fd):
    # https://stackoverflow.com/a/7730201/190597
    '''add the O_NONBLOCK flag to a file descriptor'''
    fcntl.fcntl(fd, fcntl.F_SETFL, fcntl.fcntl(fd, fcntl.F_GETFL) | os.O_NONBLOCK)

def read_async(fd):
    # https://stackoverflow.com/a/7730201/190597
    '''read some data from a file descriptor, ignoring EAGAIN errors'''
    try:
        return fd.read()
    except IOError, e:
        if e.errno != errno.EAGAIN:
            raise e
        else:
            return ''

def log_process(proc,stdout_logger,stderr_logger):
    loggers = { proc.stdout: stdout_logger, proc.stderr:  stderr_logger }
    def log_fds(fds):
        for fd in fds:
            out = read_async(fd)
            if out.strip():
                loggers[fd].info(out)
    make_async(proc.stdout)
    make_async(proc.stderr)
    while True:
        # Wait for data to become available 
        rlist, wlist, xlist = select.select([proc.stdout, proc.stderr], [], [])
        log_fds(rlist)
        if proc.poll() is not None:
            # Corner case: check if more output was created
            # between the last call to read_async and now
            log_fds([proc.stdout, proc.stderr])                
            break

if __name__=='__main__':
    formatter = logging.Formatter('[%(name)s: %(asctime)s] %(message)s')
    handler = logging.FileHandler('/tmp/test.log','w')
    handler.setFormatter(formatter)

    stdout_logger=logging.getLogger('STDOUT')
    stdout_logger.setLevel(logging.DEBUG)
    stdout_logger.addHandler(handler)

    stderr_logger=logging.getLogger('STDERR')
    stderr_logger.setLevel(logging.DEBUG)
    stderr_logger.addHandler(handler)        

    proc = subprocess.Popen(shlex.split('ls -laR /tmp'),
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    log_process(proc,stdout_logger,stderr_logger)

Answer 4

1和2不起作用。以下是该原则的实现：

import subprocess
import time

FileClass = open('tmptmp123123123.tmp', 'w').__class__

class WrappedFile(FileClass):
    TIMETPL = "%Y-%m-%d %H:%M:%S"
    TEMPLATE = "[%s: %s] "

    def __init__(self, name, mode='r', buffering=None, title=None):
        self.title = title or name

        if buffering is None:
            super(WrappedFile, self).__init__(name, mode)
        else:
            super(WrappedFile, self).__init__(name, mode, buffering)

    def write(self, s):
        stamp = time.strftime(self.TIMETPL)
        if not s:
            return 
        # Add a line with timestamp per line to be written
        s = s.split('\n')
        spre = self.TEMPLATE % (self.title, stamp)
        s = "\n".join(["%s %s" % (spre, line) for line in s]) + "\n"
        super(WrappedFile, self).write(s)

它不起作用的原因是Popen从不调用stdout.write。当我们调用其write方法时，包装文件将正常工作，如果传递给Popen，甚至会写入，但写入将发生在较低层，跳过write方法。

Answer 5

这个简单的解决方案对我有用：

import sys
import datetime
import tempfile
import subprocess as sp
def run(command, description):
    """Runs a command in a formatted manner. Returns its return code."""
    with tempfile.SpooledTemporaryFile(8*1024) as so:
        print >> sys.stderr, '%-65s' % description
        start=datetime.datetime.now()
        retcode = sp.call(command, shell=True, stderr=sp.STDOUT, stdout=so)
        end=datetime.datetime.now()
        so.seek(0)
        for line in so.readlines():
            print >> sys.stderr,'logging this:', line.rstrip()
        duration=end-start
        status='Done' if retcode == 0 else 'Failed'
        print >> sys.stderr, '%s (%d seconds)' % (status, duration.seconds)

REF_SCRIPT = r"""#!/usr/bin/python
import sys

sys.stdout.write('OUT\n')
sys.stdout.flush()
sys.stderr.write('ERR\n')
sys.stderr.flush()
"""

SCRIPT_NAME = 'refscript.py'

if __name__ == '__main__':
    with open(SCRIPT_NAME, 'w') as script:
        script.write(REF_SCRIPT)
    run('python ' + SCRIPT_NAME, 'Reference script')

装饰\委托File对象以添加功能

5 个答案: