我正在使用def __init__(self, *args)
,我想在multiporcess中使用它:
pymongo 3.2
但我收到错误:
client = MongoClient(JD_SEARCH_MONGO_URI, connect=False)
db = client.jd_search
with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
for jd in db['sample_data'].find():
jdId = jd["jdId"]
for cv in db["sample_data"].find():
itemId = cv["itemId"]
executor.submit(intersect_compute, jdId, itemId)
# print "done {} => {}".format(jdId, itemId)
根据文档,我已将UserWarning: MongoClient opened before fork. Create MongoClient with connect=False, or create client after forking. See PyMongo's documentation for details: http://api.mongodb.org/python/current/faq.html#using-pymongo-with-multiprocessing>
设置为connect
,如您所见
答案 0 :(得分:0)
您的操作与文档中的操作完全一样(例外是URL),但在Never do this
部分中。
ps。我在评论的末尾更新了您的代码示例。
# Each process creates its own instance of MongoClient.
def func():
db = pymongo.MongoClient().mydb
# Do something with db.
proc = multiprocessing.Process(target=func)
proc.start()
client = pymongo.MongoClient()
# Each child process attempts to copy a global MongoClient
# created in the parent process. Never do this.
def func():
db = client.mydb
# Do something with db.
proc = multiprocessing.Process(target=func)
proc.start()
您需要更改的是将数据库连接初始化移至每个进程的分支。因为它们每个人都有自己独立的连接。
with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
client = MongoClient(JD_SEARCH_MONGO_URI, connect=False)
db = client.jd_search
for jd in db['sample_data'].find():
jdId = jd["jdId"]
for cv in db["sample_data"].find():
itemId = cv["itemId"]
executor.submit(intersect_compute, jdId, itemId)
# print "done {} => {}".format(jdId, itemId)