我想使用multiprocessing.Pool
并行加载相当大的数据集。这是我用于实现这一目标的代码。
import pickle
import os
from os.path import join
from os import listdir
class db_loader:
def __init__(self, db_path, no_of_workers):
self.db_path = db_path
self.no_of_workers = no_of_workers
self.fp_dict = {}
def one_loader(self, the_hash):
db_path= self.db_path
the_path = join(db_path, the_hash)
with open(the_path, 'rb') as source:
self.fp_dict[the_hash] = pickle.load(source)
def pool_loader(self):
db_path = self.db_path
pool = mp.Pool(self.no_of_workers)
the_files = listdir(db_path) # a list of hashes
print(type(the_files))
print(type(self.one_loader))
pool.map(self.one_loader, the_files)
print('nothing wrong')
print(len(self.fp_dict))
t_1 = time.time()
test_loader = db_loader(db_path = db_path,
no_of_workers = 10)
test_loader.pool_loader()
print(len(test_loader.fp_dict))
但是,运行脚本后,属性fp_dict
的长度为0
。为什么multiprocessing.Pool
不会更改class属性?