好消息:使用python3 test.py
运行此操作,然后按ctrl-c。它应该停止
坏消息:使用mpirun -n 1 python3 test.py
按ctrl-c运行此操作。 Ops,mpirun被终止,但multiprocessing.pool产生的所有python进程永远存在。如何解决这个问题?
test.py:
from mpi4py import MPI
import multiprocessing as mp
import signal
import time
class GracefulKiller:
kill_now = False
def __init__(self):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
def exit_gracefully(self, signum, frame):
self.kill_now = True
print("I kill")
def worker(e):
killer = GracefulKiller()
while(True):
if killer.kill_now:
e.set()
if e.is_set():
return
def main():
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
killer = GracefulKiller()
with mp.Manager() as manager:
e = manager.Event()
pool = mp.Pool()
arg = []
for i in range(100):
arg += [e]
r = pool.map_async(worker, arg)
r.get()
pool.join()
pool.close()
if killer.kill_now:
e.set()
if e.is_set():
comm.Abort()
main()
GracefulKiller
来自How to process SIGTERM signal gracefully?
mpirun
来自openmpi。我在Ubuntu和CentOS上进行了测试。
更新
print("I kill")
。然后我再次使用ctrl-C
尝试mpirun
。它打印I kill
一次,但仍然有一堆python3
进程。UPDATE2:
pool.terminate()
以尝试在主进程捕获ctrl-c时杀死主进程pool
生成的所有进程test.py:
from mpi4py import MPI
import multiprocessing as mp
import signal
import time
class GracefulKiller:
kill_now = False
def __init__(self, name, pool=None):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
self.name = name
self.pool = pool
def exit_gracefully(self, signum, frame):
self.kill_now = True
print("I kill.", self.name)
if self.pool is not None:
self.pool.close()
self.pool.terminate()
def worker(e):
killer = GracefulKiller('worker')
while(True):
if killer.kill_now:
e.set()
if e.is_set():
return
def main():
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
with mp.Manager() as manager:
e = manager.Event()
pool = mp.Pool()
killer = GracefulKiller('main', pool)
arg = []
for i in range(100):
arg += [e]
r = pool.map_async(worker, arg)
r.get()
pool.join()
pool.close()
if killer.kill_now:
e.set()
if e.is_set():
comm.Abort()
main()
一堆python3进程继续存在。
I kill. worker/main ...
...
File "test.py", line 20, in exit_gracefully
self.pool.terminate()
File "/usr/lib/python3.5/multiprocessing/pool.py", line 505, in terminate
self._terminate()
File "/usr/lib/python3.5/multiprocessing/util.py", line 186, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/lib/python3.5/multiprocessing/pool.py", line 535, in _terminate_pool
...
I kill. worker/main ...
mpirun -n 1 python3 test.py(然后是ctrl-c):
prints nothing
和一堆python3进程存在于
可能会有所帮助的事情: