我使用多进程来处理CPU密集型任务,我有一个线程从stdin读取数据并将其放入input_queue,一个线程从output_queue获取数据并将其写入stdout,多个进程从输入队列获取数据,然后处理数据并将其放到output_queue。但是有时它会永远阻塞,我怀疑这是因为使用多处理队列不合适,但是我不知道如何解决它,有人可以帮助我吗? 我的代码如下:
import multiprocessing
import sys
import threading
import time
from multiprocessing import Queue
def write_to_stdout(result_queue: Queue):
"""write queue data to stdout"""
while True:
data = result_queue.get()
if data is StopIteration:
break
sys.stdout.write(data)
sys.stdout.flush()
def read_from_stdin(queue):
"""read data from stdin, put it in queue for process handling"""
try:
for line in sys.stdin:
queue.put(line)
finally:
queue.put(StopIteration)
def process_func(input_queue, result_queue):
"""get data from input_queue,handled,put result into result_queue"""
try:
while True:
data = input_queue.get()
if data is StopIteration:
break
# cpu intensive task,use time.sleep instead
# result = compute_something(data)
time.sleep(0.1)
result_queue.put(data)
finally:
# ensure every process end
input_queue.put(StopIteration)
if __name__ == '__main__':
# queue for reading to stdout
input_queue = Queue(1000)
# queue for writing to stdout
result_queue = Queue(1000)
# thread reading data from stdin
input_thread = threading.Thread(target=read_from_stdin, args=(input_queue,))
input_thread.start()
# thread reading data from stdin
output_thread = threading.Thread(target=write_to_stdout, args=(result_queue,))
output_thread.start()
processes = []
cpu_count = multiprocessing.cpu_count()
# start multi-process to handle some cpu intensive task
for i in range(cpu_count):
proc = multiprocessing.Process(target=process_func, args=(input_queue, result_queue))
proc.start()
processes.append(proc)
# joined input thread
input_thread.join()
# joined all task processes
for proc in processes:
proc.join()
# ensure output thread end
result_queue.put(StopIteration)
# joined output thread
output_thread.join()
测试环境:
python3.6
ubuntu16.04 lts