我有一个备忘功能包装器,带有打击和未击中计数器。 因为我无法从该函数访问非局部变量,所以我使用字典来计算命中和未命中。



是否可以在子进程出口处执行功能? atexit之类的东西适用于产生的孩子。


manager = Manager()

global_score = manager.dict({
    "hits": 0,
    "misses": 0

def memoize(func):
    local_score = {
        "hits": 0,
        "misses": 0

    cache = {}

    def process_exit_handler():
        global_score["hits"] += local_score["hits"]
        global_score["misses"] += local_score["misses"]


    def wrap(*args):
        cache_key = pickle.dumps(args)
        if cache_key not in cache:
            local_score["misses"] += 1
            cache[cache_key] = func(*args)
            local_score["hits"] += 1
        return cache[cache_key]

    return wrap

def exit_handler():
    print("Cache", global_score)



当前解决方案: 这仅与我的特定功能用例有关。我在每个进程中运行一次该函数,并且每次运行会使其自身增加大约一百万次。 我通过以下方式更改了包装器方法:

def wrap(*args):
    cache_key = pickle.dumps(args)
    if cache_key not in cache:
        local_score["misses"] += 1
        local_score["open"] += 1
        cache[cache_key] = func(*args)
        local_score["open"] -= 1
        local_score["hits"] += 1

    if local_score["open"] == 0:
        score["hits"] += local_score["hits"]
        score["misses"] += local_score["misses"]
        local_score["hits"] = 0
        local_score["misses"] = 0

    return cache[cache_key]


通过子类化Process来实现这一点相对容易,通过记忆来增强它,然后在其中建立自己的池,但是由于要使用multiprocessing.Pool,它变得越来越复杂。 Pool不能通过选择启用,我们必须混入它的胆量才能使其成为可能。继续阅读时,请确保没有子进程在观看。


  1. 使子进程在进程终止时调用退出处理程序。
  2. 防止Pool在退出处理程序完成之前终止孩子。

为了与分叉一起用作子进程的启动方法,我发现有必要制作补丁multiprocessing.pool.worker的猴子。我们可以将atexit与启动方法'spawn'一起使用(Windows上的默认设置),但这只会给我们带来很少的负担,并使我们无法使用派生的好处,因此以下代码未使用atexit 。该修补程序是worker的包装,在工作程序返回时调用我们的自定义at_exit函数,该过程在进程即将退出时发生。

# at_exit_pool.py

import os
import threading
from functools import wraps
import multiprocessing.pool
from multiprocessing.pool import worker, TERMINATE, Pool
from multiprocessing import util, Barrier
from functools import partial

def finalized(worker):
    """Extend worker function with at_exit call."""
    def wrapper(*args, **kwargs):
        result = worker(*args, **kwargs)
        at_exit()  # <-- patch
        return result
    return wrapper

worker = finalized(worker)
multiprocessing.pool.worker = worker  # patch

此解决方案还将Pool子类化以解决这两个问题。 PatientPool引入了两个强制性参数at_exitat_exit_argsat_exit正在使用出口处理程序,而PatientPool正在从标准initializer {带Pool以在子进程中注册出口处理程序。以下是处理退出处理程序的功能:

# at_exit_pool.py

def at_exit(func=None, barrier=None, *args):
    """Call at_exit function and wait on barrier."""
    print(os.getpid(), 'barrier waiting')  # DEBUG

def register_at_exit(func, barrier, *args):
    """Register at_exit function."""
    global at_exit
    at_exit = partial(at_exit, func, barrier, *args)

def combi_initializer(at_exit_args, initializer, initargs):
    """Piggyback initializer with register_at_exit."""
    if initializer:




# at_exit_pool.py

class PatientPool(Pool):
    """Pool class which awaits completion of exit handlers in child processes
    before terminating the processes."""

    def __init__(self, at_exit, at_exit_args=(), processes=None,
                 initializer=None, initargs=(), maxtasksperchild=None,
        # changed--------------------------------------------------------------
        self._barrier = self._get_barrier(processes)

        at_exit_args = (at_exit, self._barrier) + at_exit_args
        initargs = (at_exit_args, initializer, initargs)

            processes, initializer=combi_initializer, initargs=initargs,
            maxtasksperchild=maxtasksperchild, context=context
        # ---------------------------------------------------------------------

    def _get_barrier(processes):
        """Get Barrier object for use in _terminate_pool and
        child processes."""
        if processes is None:  # this will be repeated in super().__init__(...)
            processes = os.cpu_count() or 1
        if processes < 1:
            raise ValueError("Number of processes must be at least 1")

        return Barrier(processes + 1)

    def _terminate_pool(self, taskqueue, inqueue, outqueue, pool,
                        worker_handler, task_handler, result_handler, cache):
        """changed from classmethod to normal method"""
        # this is guaranteed to only be called once
        util.debug('finalizing pool')

        worker_handler._state = TERMINATE
        task_handler._state = TERMINATE

        util.debug('helping task handler/workers to finish')
        self.__class__._help_stuff_finish(inqueue, task_handler, len(pool))  # changed

        assert result_handler.is_alive() or len(cache) == 0

        result_handler._state = TERMINATE
        outqueue.put(None)  # sentinel

        # We must wait for the worker handler to exit before terminating
        # workers because we don't want workers to be restarted behind our back.
        util.debug('joining worker handler')
        if threading.current_thread() is not worker_handler:

        # patch ---------------------------------------------------------------
        print('_terminate_pool barrier waiting')  # DEBUG
        self._barrier.wait()  # <- blocks until all processes have called wait()
        print('_terminate_pool barrier crossed')  # DEBUG
        # ---------------------------------------------------------------------

        # Terminate workers which haven't already finished.
        if pool and hasattr(pool[0], 'terminate'):
            util.debug('terminating workers')
            for p in pool:
                if p.exitcode is None:

        util.debug('joining task handler')
        if threading.current_thread() is not task_handler:

        util.debug('joining result handler')
        if threading.current_thread() is not result_handler:

        if pool and hasattr(pool[0], 'terminate'):
            util.debug('joining pool workers')
            for p in pool:
                if p.is_alive():
                    # worker has not yet exited
                    util.debug('cleaning up worker %d' % p.pid)


import os
from functools import wraps
# from multiprocessing import log_to_stderr, set_start_method
# import logging
import toml
from at_exit_pool import register_at_exit, PatientPool

local_score = {
    "hits": 0,
    "misses": 0

def memoize(func):

    cache = {}

    def wrap(*args):
        cache_key = str(args)  # ~14% faster than pickle.dumps(args)
        if cache_key not in cache:
            local_score["misses"] += 1
            cache[cache_key] = func(*args)
            local_score["hits"] += 1
        return cache[cache_key]

    return wrap

def foo(x):
    for _ in range(int(x)):
        x - 1
    return x

def dump_score(pathfile):
    with open(pathfile, 'a') as fh:
        toml.dump({str(os.getpid()): local_score}, fh)

if __name__ == '__main__':

    # set_start_method('spawn')
    # logger = log_to_stderr()
    # logger.setLevel(logging.DEBUG)

    PATHFILE = 'score.toml'
    N_WORKERS = 4

    arguments = [10e6 + i for i in range(10)] * 5
    # print(arguments[:10])

    with PatientPool(at_exit=dump_score, at_exit_args=(PATHFILE,),
                     processes=N_WORKERS) as pool:

        results = pool.map(foo, arguments, chunksize=3)
        # print(results[:10])

运行此示例将产生这样的终端输出,其中“ _terminate_pool屏障越界”将始终在最后执行,而此行之前的流程可能有所不同:

555 barrier waiting
_terminate_pool barrier waiting
554 barrier waiting
556 barrier waiting
557 barrier waiting
_terminate_pool barrier crossed

Process finished with exit code 0


hits = 3
misses = 8
hits = 3
misses = 9
hits = 2
misses = 10
hits = 5
misses = 10