我在python中使用多处理模块来生成新的进程,每年一次,从2000年到2012年。这个成功运行到上周。现在,代码运行良好而不会抛出任何错误,似乎产生新的进程,但不会同时启动它们。我运行它的CPU使用ubuntu并且有足够的内存和24个处理器。
这些过程似乎是按顺序运行而不是并行运行。在过去的3个月里没有代码更改,所以我怀疑它是一个环境问题但是对于从哪里开始调试一无所知。有什么建议吗?
内核的某些默认设置是否可以防止同时执行代码? python的一些设置?
代码:
class ForEachPerson(multiprocessing.Process):
"""This class contains the funcs for the main processing."""
def __init__(self, year_queue, result_queue, dict_of_files, all, today):
multiprocessing.Process.__init__(self)
self.work_queue = year_queue
self.result_queue = result_queue
self.kill_received = False
self.dict = dict_of_files
self.all = all
self.today = today
def run(self):
while not self.kill_received:
try:
year = self.work_queue.get_nowait()
year_start_date = year[0]
year_end_date = year[1]
split = year_end_date.year
except Queue.Empty:
self.result_queue.close()
return
if self.all:
try:
null_pids = self.dict["null_pids"]
except KeyError:
null_pids = []
#For each employee calculate the data and write to file.
today = self.today
hie = hie_util.Build()
hie_op = open("output.csv", "wb")
hierarchy_op.write("....\n")
/* do function */
............
hierarchy_op.close()
timestr = ("%s End writing for %s"
% (str(datetime.datetime.now()), str(year)))
self.result_queue.put(timestr)
def Manage(years, dict_of_files, num_processes, all, today):
"""Responsible for creating & assigning tasks to worker processes."""
#load up year queue
year_queue = multiprocessing.Queue()
for year in years:
year_queue.put(year)
if num_processes > len(years):
num_processes = len(years)
# queue to pass to workers to store the results
result_queue = multiprocessing.Queue()
# spawn workers
workers = []
for i in range(num_processes):
worker = ForEachPerson(year_queue, result_queue, dict_of_files, all, today)
logging.info("Worker spawned for processor " + str(i + 1))
worker.start()
workers.append(worker)
# collect results off the queue
logging.info("results being collected")
results = []
while len(results) < len(years):
try:
result = result_queue.get()
logging.info(str(result[0]))
results.append(result[1])
except Queue.Empty:
pass
count = 0
for worker in workers:
logging.info("Terminating worker: " + str(count))
worker.terminate()
count += 1
return results
def RunHie():
"""Main control flow for building."""
logging.info("Start ")
sql_instance = hie_sql.SQLExportImport()
sql_instance.RunEtl()
# gather list of dates
date_full_list = DailyDates()
dict_of_files = ReadFiles()
# calculate hierarchy - run
num_processes = multiprocessing.cpu_count() - 1
results = Manage(date_full_list, dict_of_files, num_processes, 0, today[1])
logging.info("End")