我喜欢默认的python multiprocessing.Pool,但是要显示池执行过程中的当前进度并不容易,这仍然很麻烦。出于这种考虑,我试图创建自己的自定义多进程池映射器,看起来像这样;
from multiprocessing import Process, Pool, cpu_count
from iterable_queue import IterableQueue
def _proc_action(f, in_queue, out_queue):
try:
for val in in_queue:
out_queue.put(f(val))
except (KeyboardInterrupt, EOFError):
pass
def progress_pool_map(f, ls, n_procs=cpu_count()):
in_queue = IterableQueue()
out_queue = IterableQueue()
err = None
try:
procs = [Process(target=_proc_action, args=(f, in_queue, out_queue)) for _ in range(n_procs)]
[p.start() for p in procs]
for elem in ls:
in_queue.put(elem)
in_queue.close()
bar = 0
for _ in ls:
elem = next(out_queue)
bar += 1
if bar % 1000 == 0:
print(bar)
yield elem
out_queue.close()
except (KeyboardInterrupt, EOFError) as e:
in_queue.close()
out_queue.close()
print("Joining processes")
[p.join() for p in procs]
print("Closing processes")
[p.close() for p in procs]
err = e
if err:
raise err
它工作得很好,并且每处理1000个项目就会向控制台打印一个值。进度显示本身是我将来可能会担心的事情。但是现在,我的问题是,取消操作后,该操作将执行任何操作,但会正常失败。当我尝试中断地图时,它会挂在Joining Processes
上,并且永远不会到达Closing Processes
。如果我再次尝试按Ctrl + C,它将导致BrokenPipeError
s的无限涌入,直到控制台发送直到我发送EOF并停止程序为止。
这里是iterable_queue.py
,以供参考;
from multiprocessing.queues import Queue
from multiprocessing import get_context, Value
import queue
class QueueClosed(Exception):
pass
class IterableQueue(Queue):
def __init__(self, maxsize=0, *, ctx=None):
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
self.closed = Value('b', False)
def close(self):
with self.closed.get_lock():
if not self.closed.value:
self.closed.value = True
super().put((None, False))
# throws BrokenPipeError in another thread without this sleep in between
# terrible hack, must fix at some point
import time; time.sleep(0.01)
super().close()
def __iter__(self):
return self
def __next__(self):
try:
return self.get()
except QueueClosed:
raise StopIteration
def get(self, *args, **kwargs):
try:
result, is_open = super().get(*args, **kwargs)
except OSError:
raise QueueClosed
if not is_open:
super().put((None, False))
raise QueueClosed
return result
def __bool__(self):
return bool(self.closed.value)
def put(self, val, *args, **kwargs):
with self.closed.get_lock():
if self.closed.value:
raise QueueClosed
super().put((val, True), *args, **kwargs)
def get_nowait(self):
return self.get(block=False)
def put_nowait(self):
return self.put(block=False)
def empty_remaining(self, block=False):
try:
while True:
yield self.get(block=block)
except (queue.Empty, QueueClosed):
pass
def clear(self):
for _ in self.empty_remaining():
pass
def __enter__(self):
return self
def __exit__(self, *args):
self.close()