我有一个备忘功能包装器,带有打击和未击中计数器。 因为我无法从该函数访问非局部变量,所以我使用字典来计算命中和未命中。
该功能在48个内核上的约1000个并行进程中运行,每个内核超过一百万次,因此我使用Manager.dict
来管理得分。
仅保持分数是我执行时间的三倍,所以我想做些更聪明的事情-我想保留一个只是普通字典的本地计数器,然后在过程退出时,将该分数添加到普通分数字典中,由经理管理。
是否可以在子进程出口处执行功能? atexit
之类的东西适用于产生的孩子。
相关代码:(请注意MAGICAL_AT_PROCESS_EXIT_CLASS
,这是我想要的)
manager = Manager()
global_score = manager.dict({
"hits": 0,
"misses": 0
})
def memoize(func):
local_score = {
"hits": 0,
"misses": 0
}
cache = {}
def process_exit_handler():
global_score["hits"] += local_score["hits"]
global_score["misses"] += local_score["misses"]
MAGICAL_AT_PROCESS_EXIT_CLASS.register(process_exit_handler)
@wraps(func)
def wrap(*args):
cache_key = pickle.dumps(args)
if cache_key not in cache:
local_score["misses"] += 1
cache[cache_key] = func(*args)
else:
local_score["hits"] += 1
return cache[cache_key]
return wrap
def exit_handler():
print("Cache", global_score)
atexit.register(exit_handler)
(是的,我知道它会独立缓存每个进程。是的,这是我们期望的行为)
当前解决方案: 这仅与我的特定功能用例有关。我在每个进程中运行一次该函数,并且每次运行会使其自身增加大约一百万次。 我通过以下方式更改了包装器方法:
@wraps(func)
def wrap(*args):
cache_key = pickle.dumps(args)
if cache_key not in cache:
local_score["misses"] += 1
local_score["open"] += 1
cache[cache_key] = func(*args)
local_score["open"] -= 1
else:
local_score["hits"] += 1
if local_score["open"] == 0:
score["hits"] += local_score["hits"]
score["misses"] += local_score["misses"]
local_score["hits"] = 0
local_score["misses"] = 0
return cache[cache_key]
无需同步写入数亿次,而只需同步进程数(1000)。
答案 0 :(得分:1)
通过子类化Process
来实现这一点相对容易,通过记忆来增强它,然后在其中建立自己的池,但是由于要使用multiprocessing.Pool
,它变得越来越复杂。 Pool
不能通过选择启用,我们必须混入它的胆量才能使其成为可能。继续阅读时,请确保没有子进程在观看。
有两个问题要解决。
Pool
在退出处理程序完成之前终止孩子。为了与分叉一起用作子进程的启动方法,我发现有必要制作补丁multiprocessing.pool.worker
的猴子。我们可以将atexit
与启动方法'spawn'一起使用(Windows上的默认设置),但这只会给我们带来很少的负担,并使我们无法使用派生的好处,因此以下代码未使用atexit
。该修补程序是worker
的包装,在工作程序返回时调用我们的自定义at_exit
函数,该过程在进程即将退出时发生。
# at_exit_pool.py
import os
import threading
from functools import wraps
import multiprocessing.pool
from multiprocessing.pool import worker, TERMINATE, Pool
from multiprocessing import util, Barrier
from functools import partial
def finalized(worker):
"""Extend worker function with at_exit call."""
@wraps(worker)
def wrapper(*args, **kwargs):
result = worker(*args, **kwargs)
at_exit() # <-- patch
return result
return wrapper
worker = finalized(worker)
multiprocessing.pool.worker = worker # patch
此解决方案还将Pool
子类化以解决这两个问题。 PatientPool
引入了两个强制性参数at_exit
和at_exit_args
。 at_exit
正在使用出口处理程序,而PatientPool
正在从标准initializer
{带Pool
以在子进程中注册出口处理程序。以下是处理退出处理程序的功能:
# at_exit_pool.py
def at_exit(func=None, barrier=None, *args):
"""Call at_exit function and wait on barrier."""
func(*args)
print(os.getpid(), 'barrier waiting') # DEBUG
barrier.wait()
def register_at_exit(func, barrier, *args):
"""Register at_exit function."""
global at_exit
at_exit = partial(at_exit, func, barrier, *args)
def combi_initializer(at_exit_args, initializer, initargs):
"""Piggyback initializer with register_at_exit."""
if initializer:
initializer(*initargs)
register_at_exit(*at_exit_args)
正如您在at_exit
中看到的那样,我们将使用multiprocessing.Barrier
。使用此同步原语是我们第二个问题的解决方案,可以防止Pool
在退出处理程序完成其工作之前终止子进程。
屏障的工作方式是,只要“派对”进程没有在其上调用.wait()
,它将阻止在其上调用.wait()
的任何进程。
PatientPool
初始化这样的屏障,并将其传递给其子进程。此屏障中的parties
参数设置为子进程的数量+1。子进程在完成.wait()
后立即在此屏障上调用at_exit
。 PatientPool
本身也为此障碍调用.wait()
。为此,我们在_terminate_pool
中重写了Pool
方法。这样做可以防止池过早地终止子进程,因为所有调用.wait()
的进程也将仅在所有子进程都已到达屏障时才释放。
# at_exit_pool.py
class PatientPool(Pool):
"""Pool class which awaits completion of exit handlers in child processes
before terminating the processes."""
def __init__(self, at_exit, at_exit_args=(), processes=None,
initializer=None, initargs=(), maxtasksperchild=None,
context=None):
# changed--------------------------------------------------------------
self._barrier = self._get_barrier(processes)
at_exit_args = (at_exit, self._barrier) + at_exit_args
initargs = (at_exit_args, initializer, initargs)
super().__init__(
processes, initializer=combi_initializer, initargs=initargs,
maxtasksperchild=maxtasksperchild, context=context
)
# ---------------------------------------------------------------------
@staticmethod
def _get_barrier(processes):
"""Get Barrier object for use in _terminate_pool and
child processes."""
if processes is None: # this will be repeated in super().__init__(...)
processes = os.cpu_count() or 1
if processes < 1:
raise ValueError("Number of processes must be at least 1")
return Barrier(processes + 1)
def _terminate_pool(self, taskqueue, inqueue, outqueue, pool,
worker_handler, task_handler, result_handler, cache):
"""changed from classmethod to normal method"""
# this is guaranteed to only be called once
util.debug('finalizing pool')
worker_handler._state = TERMINATE
task_handler._state = TERMINATE
util.debug('helping task handler/workers to finish')
self.__class__._help_stuff_finish(inqueue, task_handler, len(pool)) # changed
assert result_handler.is_alive() or len(cache) == 0
result_handler._state = TERMINATE
outqueue.put(None) # sentinel
# We must wait for the worker handler to exit before terminating
# workers because we don't want workers to be restarted behind our back.
util.debug('joining worker handler')
if threading.current_thread() is not worker_handler:
worker_handler.join()
# patch ---------------------------------------------------------------
print('_terminate_pool barrier waiting') # DEBUG
self._barrier.wait() # <- blocks until all processes have called wait()
print('_terminate_pool barrier crossed') # DEBUG
# ---------------------------------------------------------------------
# Terminate workers which haven't already finished.
if pool and hasattr(pool[0], 'terminate'):
util.debug('terminating workers')
for p in pool:
if p.exitcode is None:
p.terminate()
util.debug('joining task handler')
if threading.current_thread() is not task_handler:
task_handler.join()
util.debug('joining result handler')
if threading.current_thread() is not result_handler:
result_handler.join()
if pool and hasattr(pool[0], 'terminate'):
util.debug('joining pool workers')
for p in pool:
if p.is_alive():
# worker has not yet exited
util.debug('cleaning up worker %d' % p.pid)
p.join()
现在,在主模块中,您只需要为Pool
切换PatientPool
并传递所需的at_exit
参数。为简单起见,我的退出处理程序将local_score附加到toml文件中。请注意,local_score
必须是全局变量,以便退出处理程序可以访问它。
import os
from functools import wraps
# from multiprocessing import log_to_stderr, set_start_method
# import logging
import toml
from at_exit_pool import register_at_exit, PatientPool
local_score = {
"hits": 0,
"misses": 0
}
def memoize(func):
cache = {}
@wraps(func)
def wrap(*args):
cache_key = str(args) # ~14% faster than pickle.dumps(args)
if cache_key not in cache:
local_score["misses"] += 1
cache[cache_key] = func(*args)
else:
local_score["hits"] += 1
return cache[cache_key]
return wrap
@memoize
def foo(x):
for _ in range(int(x)):
x - 1
return x
def dump_score(pathfile):
with open(pathfile, 'a') as fh:
toml.dump({str(os.getpid()): local_score}, fh)
if __name__ == '__main__':
# set_start_method('spawn')
# logger = log_to_stderr()
# logger.setLevel(logging.DEBUG)
PATHFILE = 'score.toml'
N_WORKERS = 4
arguments = [10e6 + i for i in range(10)] * 5
# print(arguments[:10])
with PatientPool(at_exit=dump_score, at_exit_args=(PATHFILE,),
processes=N_WORKERS) as pool:
results = pool.map(foo, arguments, chunksize=3)
# print(results[:10])
运行此示例将产生这样的终端输出,其中“ _terminate_pool屏障越界”将始终在最后执行,而此行之前的流程可能有所不同:
555 barrier waiting
_terminate_pool barrier waiting
554 barrier waiting
556 barrier waiting
557 barrier waiting
_terminate_pool barrier crossed
Process finished with exit code 0
带有此次运行得分的toml文件如下所示:
[555]
hits = 3
misses = 8
[554]
hits = 3
misses = 9
[556]
hits = 2
misses = 10
[557]
hits = 5
misses = 10