我试图编写一个可以进行多处理的模块,但是我遇到了一种奇怪的行为,该代码在Linux(Fedora)中有效,而在Windows中则无效。
问题出在以下几行:
p = multiprocessing.Process(target=self.__job_function, args=(target_function, self.__input_index, curr_input, self.__q)) # doesn't work in Windows
如果我改用target=self.job_function
,它将起作用,但这不是一个好的解决方案,因为内部函数在不应该使用的时候会公开。
best_effort_multiprocessing_pool.py
import multiprocessing
class BestEffortPool(object):
__processes = []
__input_index = 0
__q = multiprocessing.Queue()
__MAX_GROUP_SIZE = 100
def __join_and_terminate_processes(self, pid):
dead_processes = [p for p in self.__processes if p.pid == pid][0]
dead_processes.join()
dead_processes.terminate()
def __init__(self, num_processes):
self.__MAX_GROUP_SIZE = num_processes
'''
This works in Fedora using Python 3.7.2, but not in Windows using Python 3.7.2
'''
def job_function(self, target_function, input_index, input_arg, q):
result = target_function(input_arg)
q.put([input_index, result, multiprocessing.current_process().pid])
def __job_function(self, target_function, input_index, input_arg, q):
result = target_function(input_arg)
q.put([input_index, result, multiprocessing.current_process().pid])
'''
Runs the pool of processes
target_function - the target function to run
'''
def run(self, target_function, input_set):
if __name__ == 'best_effort_multiprocessing_pool':
results = []
self.__input_index = 0
while self.__input_index < len(input_set) or len([p for p in self.__processes if p.is_alive()]) > 0 or not self.__q.empty():
# start a new process unless the maximum pool size has been reached or if there are no more inputs
if len(multiprocessing.active_children()) < self.__MAX_GROUP_SIZE and self.__input_index < len(input_set):
curr_input = input_set[self.__input_index]
# p = multiprocessing.Process(target=self.job_function, args=(target_function, self.__input_index, curr_input, self.__q)) # works in Windows
p = multiprocessing.Process(target=self.__job_function, args=(target_function, self.__input_index, curr_input, self.__q)) # doesn't work in Windows
self.__input_index += 1
self.__processes.append(p)
p.start()
# empty the queue and join processes that have finished
while not self.__q.empty():
input_index, result, pid = self.__q.get()
self.__join_and_terminate_processes(pid)
results.append([input_index, result])
return results
main.py
import sys
import best_effort_multiprocessing_pool
import os, multiprocessing
def job(input_number):
if input_number in [3, 5, 16]: # randomly kill some processes
os.kill(multiprocessing.current_process().pid, 9)
return str(input_number) + " hey"
if __name__ == '__main__':
be_pool = best_effort_multiprocessing_pool.BestEffortPool(100)
print("Starting processes...")
multiprocessing_results = be_pool.run(job, range(0,20))
print("Processes finished...")
for result in multiprocessing_results:
print(result)
我得到的错误很长,所以我不会发布整个内容,但是它抱怨:AttributeError: 'BestEffortPool' object has no attribute '__job_function'
这很奇怪,因为它可以在Fedora中使用,也很奇怪,因为它没有抱怨其他私有变量,例如self .__ q和self .__ input_index
预期结果(不一定按此顺序):
$ Starting processes...
Processes finished...
[2, '2 hey']
[1, '1 hey']
[7, '7 hey']
[6, '6 hey']
[4, '4 hey']
[13, '13 hey']
[10, '10 hey']
[8, '8 hey']
[17, '17 hey']
[11, '11 hey']
[14, '14 hey']
[0, '0 hey']
[12, '12 hey']
[15, '15 hey']
[9, '9 hey']
[18, '18 hey']
[19, '19 hey']