child = subprocess.Popen(command,
shell=True,
env=environment,
close_fds=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=sys.stdin,
preexec_fn=os.setsid
)
child_interrupted = False
while child.poll() is None:
if Signal.isInterrupted():
child_interrupted = True
os.killpg(os.getpgid(child.pid), signal.SIGTERM)
break
time.sleep(0.1)
subout = child.communicate()[0]
logging.info(subout)
以上适用于大多数命令执行(90%),但对于某些命令,它会挂起
对于那些反复挂起的命令,如果我摆脱了以下,它可以正常工作:
child_interrupted = False
while child.poll() is None:
if Signal.isInterrupted():
child_interrupted = True
os.killpg(os.getpgid(child.pid), signal.SIGTERM)
break
time.sleep(0.1)
我假设那些挂起的命令child.poll() is None
,即使作业已经完成了吗?
communic()可以告诉进程是否完成但是poll()不能?
我已就这些流程执行了ps -ef
并且只有在child.poll()
代码到位时它们才会失效
知道为什么吗?
它看起来像已经失效的手段"这是一个僵尸进程,它已经完成但是父母还没有等待()编辑它。" 好吧,即时通讯,看看我是否可以打电话给等/通讯......
答案 0 :(得分:1)
您已将Popen
对象设置为通过管道接收子进程的stdout
。问题是,在进程退出之前,您不会从该管道读取。如果进程产生足够的输出来填充OS级别的管道缓冲区,并且你没有排空管道,那么你就会陷入僵局;子进程希望您读取其写入的输出,以便它可以继续写入,然后退出,同时等待它在您读取输出之前退出。
如果你的显式轮询和中断检查是必要的,那么解决这个死锁的最简单方法就是启动一个排空管道的线程:
... launch the thread just after Popen called ...
draineddata = []
# Trivial thread just reads lines from stdout into the list
drainerthread = threading.Thread(target=draineddata.extend, args=(child.stdout,))
drainerthread.daemon = True
drainerthread.start()
... then where you had been doing communicate, change it to: ...
child.wait()
drainerthread.join()
subout = b''.join(draineddata) # Combine the data read back to a single output