在下面的代码中,我试图分叉一个运行命令的进程,然后在子子进程退出时捕获结果。
最后,在全局var上运行循环,以等待子进程结束,以便父进程在子进程之前不会退出,但命令的整体运行在某种程度上是非阻塞的。代码在10次中有9次正常工作,但偶尔会出现错误。
如果subprocess.Popen
返回None
,则会出现错误。但我不确定为什么会随机发生。
有人可以帮忙弄清楚这里出了什么问题吗?
计算机详细信息
[root@1-0-0-9 /]# uname -a
Linux 1-0-0-9 3.10.0-229.el7.x86_64 #1 SMP Thu Jan 29 18:37:38 EST 2015 x86_64 x86_64 x86_64 GNU/Linux
代码:
#!/usr/bin/env python
import os
import subprocess
import signal
import time
flag = False
class Utils(object):
def __init__(self):
self.child_pid = None
signal.signal(signal.SIGCHLD, self.sigchld_handler)
def sigchld_handler(self, *args):
print "handling SIGCHLD"
p = self.child_pid
stdout_val = p.communicate()[0]
retcode = p.returncode
print p.returncode, stdout_val.strip()
self.child_pid = None
global flag
flag = False
def run_command(self, cmnd, env=None, cwd=None, timeout=0):
global flag
flag = True
cmnd = cmnd.split()
self.child_pid =subprocess.Popen(cmnd, stdin=None, bufsize=-1, env=env,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
close_fds=True, cwd=cwd, preexec_fn=os.setsid)
print "Invoked child process " , self.child_pid.pid
print "Running command .."
Utils().run_command("ls -lrt")
for i in xrange(10000):
if not i % 1000:
print i
print flag
i = 0
while flag:
i = i + 1
正确(所需)输出:
Running command ..
Invoked child process 9703
0
1000
2000
3000
4000
5000
handling SIGCHLD
0 total 52
drwxr-xr-x. 2 root root 6 Mar 13 2014 srv
drwxr-xr-x. 2 root root 6 Mar 13 2014 mnt
drwxr-xr-x. 2 root root 6 Mar 13 2014 media
drwxr-xr-x. 2 root root 6 Mar 13 2014 home
lrwxrwxrwx. 1 root root 7 Jan 9 2016 bin -> usr/bin
lrwxrwxrwx. 1 root root 9 Jan 9 2016 lib64 -> usr/lib64
lrwxrwxrwx. 1 root root 7 Jan 9 2016 lib -> usr/lib
lrwxrwxrwx. 1 root root 8 Jan 9 2016 sbin -> usr/sbin
drwxr-xr-x. 13 root root 4096 Jan 9 2016 usr
drwxr-xr-x. 4 root root 28 Nov 18 16:03 opt
dr-xr-xr-x. 4 root root 4096 Nov 18 16:06 boot
dr-xr-xr-x 178 root root 0 Nov 22 21:53 proc
dr-xr-xr-x 13 root root 0 Nov 22 21:53 sys
drwxr-xr-x. 22 root root 4096 Nov 22 21:53 var
drwxr-xr-x 19 root root 3060 Nov 22 21:53 dev
drwxr-xr-x. 124 root root 8192 Nov 22 21:53 etc
dr-xr-x---. 8 root root 4096 Nov 22 21:53 root
-rw-r--r-- 1 root root 573 Nov 22 22:15 a.py
-rw-r--r-- 1 root root 1108 Nov 22 22:15 cmnd.py
-rw-r--r-- 1 root root 1800 Nov 22 22:15 fork.py
-rw-r--r-- 1 root root 1368 Nov 22 22:15 ipc_pipe.py
-rw-r--r-- 1 root root 491 Nov 22 22:15 threads.py
drwxr-xr-x 35 root root 1000 Nov 22 22:35 run
drwxrwxrwt. 8 root root 4096 Nov 22 22:35 tmp
6000
7000
8000
9000
False
错误(失败案例):
Running command ..
handling SIGCHLD
handling SIGCHLD
handling SIGCHLD
Traceback (most recent call last):
File "cmnd.py", line 37, in <module>
Utils().run_command("ls -lrt")
File "cmnd.py", line 33, in run_command
close_fds=True, cwd=cwd, preexec_fn=os.setsid)
File "/usr/lib64/python2.7/subprocess.py", line 711, in __init__
errread, errwrite)
File "/usr/lib64/python2.7/subprocess.py", line 1296, in _execute_child
data = _eintr_retry_call(os.read, errpipe_read, 1048576)
File "/usr/lib64/python2.7/subprocess.py", line 478, in _eintr_retry_call
return func(*args)
File "cmnd.py", line 19, in sigchld_handler
stdout_val = p.communicate()[0]
AttributeError: 'NoneType' object has no attribute 'communicate'
答案 0 :(得分:4)
我能够复制NoneType
错误,这显然是一种竞争条件。为了证明,我导入了traceback
并将print traceback.print_stack(args[1])
添加到信号处理程序中。堆栈跟踪显示当信号到达且os.fdopen
尚未分配时,Popen仍在等待self.child_pid
。
Running command ..
handling SIGCHLD
File "c.py", line 39, in <module>
Utils().run_command("ls -lrt")
File "c.py", line 35, in run_command
close_fds=True, cwd=cwd, preexec_fn=os.setsid)
File "/usr/lib/python2.7/subprocess.py", line 740, in __init__
self.stdout = os.fdopen(c2pread, 'rb', bufsize)
None
Traceback (most recent call last):
File "c.py", line 39, in <module>
Utils().run_command("ls -lrt")
File "c.py", line 35, in run_command
close_fds=True, cwd=cwd, preexec_fn=os.setsid)
File "/usr/lib/python2.7/subprocess.py", line 740, in __init__
self.stdout = os.fdopen(c2pread, 'rb', bufsize)
File "c.py", line 21, in sigchld_handler
stdout_val = p.communicate()[0]
AttributeError: 'NoneType' object has no attribute 'communicate'
用我能想到的信号来解决这个问题是没有好方法的。但是您的代码还存在其他问题,例如,如果子进程stdout
或stderr
管道填满,则可能会出现死锁。您可以使用后台线程调用Popen.communicate
而不是信号,并使用poll
和wait
方法查看过程是否完整。
#!/usr/bin/env python
import os
import subprocess
import time
import threading
flag = False
class Utils(object):
def __init__(self):
self.child = None
self._thread = None
def run_command(self, cmnd, env=None, cwd=None, timeout=0):
global flag
flag = True
cmnd = cmnd.split()
self.child = subprocess.Popen(cmnd, stdin=None, bufsize=-1, env=env,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
close_fds=True, cwd=cwd, preexec_fn=os.setsid)
self._thread = threading.Thread(target=self._communicate_thread)
self._thread.start()
print "Invoked child process " , self.child.pid
return self
def _communicate_thread(self):
self.out, self.err = self.child.communicate()
def poll(self):
return self.child.poll()
def wait(self):
rc = self.child.wait()
if self._thread:
self._thread.join()
self._thread = None
return rc
print "Running command .."
cmd = Utils().run_command("ls -lrt")
while True:
print 'poll', cmd.poll()
if cmd.poll() is not None:
break
else:
time.sleep(.1)
print 'done', cmd.wait()
print cmd.out
print cmd.err