我需要一些帮助来确定pymongo多处理设置出了什么问题。到目前为止,我已经尝试按照文档中的建议设置所有内容。这是我的代码的摘要(不是实际的脚本,但功能相似)。
connection.py
def multi_extract_onedata(dct_params, myclient):
with myclient:
db = myclient[dct_params['db']]
collection = db[dct_params['table']]
cursor = collection.find_one(dct_params['filter_clause'])
return cursor
def multi_extract_selectfield(dct_params, fieldlist, myclient):
projection = {field:1 for field in fieldlist}
projection['_id'] = 0
with myclient:
db = myclient[dct_params['db']]
collection = db[dct_params['table']]
cursors = collection.find(dct_params['filter_clause'], projection)
return [tuple(c.values()) for c in cursors]
main.py
import connection
import multiprocessing
from pymongo import MongoClient
def main_task():
client = MongoClient("mongodb://user:password@address:port")
dct_params = {
'db': 'database',
'table': 'table',
'filter_clause': {
'Id': 'some_id'
}
}
fields = ['fieldA', 'fieldB']
ret1 = connection.multi_extract_selectfield(dct_params, fields, client)
ret2 = None
if ret1:
ret2 = connection.multi_extract_onedata(dct_params, client)
return ret2
if __name__ == '__main__':
all_block_id = ['1', '2', '3', '4']
with multiprocessing.Pool(4) as pool:
res = [dp for dp in pool.map(main_task, all_block_id)]
with open('file.txt', 'w') as f:
for dp in res:
f.write(str(dp))
中心思想是为Pool产生的每个子进程创建一个客户端,这是我在main_task中处理的。但是,我无法使脚本成功运行。通过Ctrl-C终止后,追溯显示,对3个工作线程的最后一次调用是在multiprocessing / synchronize.py中的return self._semlock.__enter__()
,而第四个调用停留在multiprocessing / connection.py上的chunk = read(handle, remaining)
。预先感谢您提供的任何帮助!