我正在尝试将bash管道导出到python中并使用多个进程来加速我的管道。所以我有几个子进程产生了mulitprocessing.Pipe连接它们(下面的示例代码)。
但是,我总是在流程结束时到处停止,输出文件丢失了最后一位。看起来像缓冲区死锁或刷新,但无法弄明白。我相信我错过了一些明显的东西,请告诉我:)任何帮助或评论将不胜感激。谢谢
#!/usr/bin/env python
from __future__ import print_function
import os
import sys
import multiprocessing as MP
import subprocess as SP
def usage():
Usage = "Need /path/to/infile"
log2err(Usage)
sys.exit(1)
def log2err(*s):
print(s, file=sys.stderr)
def pipeWrite(proc, outpipe):
# ouput only
global killToken
while 1:
s = proc.stdout.read(bufferSize)
if len(s) == 0:
break
#log2err('Write %s: %s' %(os.getpid(), len(s)))
outpipe.send_bytes(s)
log2err("Write PID %s: sending kill" % os.getpid())
outpipe.send_bytes(killToken)
outpipe.close()
def pipeRead(proc, inpipe):
# input only
global killToken
while 1:
s = inpipe.recv_bytes(bufferSize)
#log2err('Read %s: %s' %(os.getpid(), len(s)))
if s == killToken:
log2err("Read PID %s: received kill" % os.getpid())
break
proc.stdin.write(s)
proc.stdin.flush()
# final cleanup
proc.stdin.flush()
proc.stdin.close()
inpipe.close()
def testRead(infile, outpipe):
stmt = "cat %s" % infile
stmt += " | tee >( wc -l 1>&2)"
proc = SP.Popen(stmt, shell=True, stdout=SP.PIPE, executable='/bin/bash', bufsize=bufferSize)
pipeWrite(proc, outpipe)
proc.stdout.close()
outpipe.close()
log2err('testRead is DONE')
def testGzip(inpipe, outpipe):
stmt = "gzip -c - | tee >(wc -l 1>&2)"
proc = SP.Popen(stmt, shell=True, stdout=SP.PIPE, stdin=SP.PIPE, executable='/bin/bash', bufsize=bufferSize)
PR = MP.Process(target=pipeRead, args=(proc, inpipe))
PW = MP.Process(target=pipeWrite, args=(proc, outpipe))
PW.start()
PR.start()
PR.join()
proc.stdin.flush()
proc.stdin.close()
proc.stdout.flush()
proc.stdout.close()
log2err("testGzip PID:%s with Read:%s and Write:%s" %(os.getpid(), PR.pid, PW.pid))
PW.join()
log2err('testGzip is DONE')
def testOutput(infile, inpipe):
stmt = "tee %s.gz | sha512sum - > %s.gz.sha512" % (infile, infile)
proc = SP.Popen(stmt, shell=True, stdin=SP.PIPE, executable='/bin/bash', bufsize=bufferSize)
pipeRead(proc, inpipe)
inpipe.close()
log2err('outputFinal is DONE')
if __name__ == "__main__" :
try:
infile = sys.argv[1]
if infile in ('-h', '--help'):
usage()
except IndexError:
usage()
bufferSize = 256*256
killToken = "I am done with all of these processes"
# the curl task stream and the curl checksum and output stream
ingestRecv, ingestSend = MP.Pipe(False)
ingestProc = MP.Process(target=testRead, args=(infile, ingestSend))
# encrypt stream
workRecv, workSend = MP.Pipe(False)
workProc = MP.Process(target=testGzip, args=(ingestRecv, workSend))
# output to file and sha checksum stream
outputProc = MP.Process(target=testOutput, args=(infile, workRecv))
ingestProc.start()
log2err("ingestProc PID:%s" % ingestProc.pid)
workProc.start()
log2err("workProc PID:%s" % workProc.pid)
outputProc.start()
log2err("outputProc PID:%s" % outputProc.pid)
ingestProc.join()
log2err("ingestProc: joined")
workProc.join()
log2err("workProc: joined")
outputProc.join()
log2err("outputProc: joined")