我必须使用库 multiprocessing 。尝试在以下循环中并行运行:
tag=[]
#get all model ID
all_model_id=get_models_id() #I have a list of list
liste_all_img_id=[]
#I want to start multiprocessing here
for i in range(0,len(all_model_id)):
tag=get_tags(all_model_id[i][0]) # get_tags function return me a list
#print(tag)
for l in range(0,len(tag)):
liste_all_img_id.append(get_images_id(tag[l][0],all_model_id[i][0])) #get_images_id function return me a list
我尝试过:
def funcs(start,end):
tag=[]
list_all_img_id=[]
for i in range(start,end):
tag=get_tags(all_model_id[i][0])
for l in range(0,len(tag)):
list_all_img_id.append(get_images_id(tag[l][0],all_model_id[i][0]))
return(list_all_img_id)
from multiprocessing import Pool
import multiprocessing
def main():
all_model_id=get_models_id()
len_all_model_id=len(all_model_id)
div_total = int(len_all_model_id / 3)
rest_div_total = len_all_model_id%3
t1 = multiprocessing.Process(target = funcs,name = "", args=(0, div_total))
t2 = multiprocessing.Process(target = funcs,name = "", args=(div_total, div_total*2))
t3 = multiprocessing.Process(target = funcs,name = "", args=(div_total*2, div_total*3 + rest_div_total + 1))
list_threads = [t1,t2,t3]
for i in list_threads:
i.start()
for i in list_threads:
i.join()
if __name__ == "__main__":
main()
但是:
我不确定主函数定义是否正确
我不知道如何存储结果
答案 0 :(得分:1)
def funcs(start,end):
tag=[]
list_all_img_id=[]
for i in range(start,end):
tag=get_tags(all_model_id[i][0])
for l in range(0,len(tag)):
list_all_img_id.append(get_images_id(tag[l][0],all_model_id[i][0]))
return(list_all_img_id)
from multiprocessing.pool import Pool
def main():
all_model_id=get_models_id()
len_all_model_id=len(all_model_id)
div_total = int(len_all_model_id / 3)
rest_div_total = len_all_model_id%3
with Pool(3) as pool:
results = []
# submit 3 tasks without blocking
results.append(pool.apply_async(funcs, args=(0, div_total)))
results.append(pool.apply_async(funcs, args=(div_total, div_total*2)))
results.append(pool.apply_async(funcs, args=(div_total*2, div_total*3 + rest_div_total + 1)))
# now await 3 results:
for result in results:
print(result.get())
if __name__ == "__main__":
main()
请注意,apply_async
带有一个可选的回调参数,您可以在其中指定要调用的函数,该函数将在结果可用时立即返回结果(任务的实际返回值),顺序可能不正确在其中提交任务。以上获得结果的方法(即,依靠从apply_async
返回的结果对象,可以在该对象上进行对get
的阻塞调用)将始终按照任务提交顺序获得结果,例如{{ 1}}函数,如果您将所有任务提交的所有调用参数都放在一个可迭代的列表(例如列表或元组)中,则是一种合理的选择):
starmap
我也很喜欢with Pool(3) as pool:
results = pool.starmap(funcs, [
(0, div_total),
(div_total, div_total*2),
(div_total*2, div_total*3 + rest_div_total + 1)
])
for result in results:
print(result)
模块,但希望对程序进行最少的更改。但是请注意,您可以通过简单地调用以下命令来使用与多处理池类兼容的未记录但始终如一的ThreadPool类:
concurrent.futures
代替
from mulitprocessing.pool import ThreadPool
,然后指定:
from mulitprocessing.pool import Pool
如果您的任务非常耗费I / O,那么线程化可能是一个更好的选择。
答案 1 :(得分:0)
我已经稍微修改了您的代码,但是我还没有运行它。但是我认为我的代码可以解决您的问题。问我一些不适合您的事情。
from multiprocessing import Pool
import multiprocessing as mp
def funcs(tupl):
start, end = tupl[0], tupl[1]
tag=[]
list_all_img_id=[]
for i in range(start,end):
tag=get_tags(all_model_id[i][0])
for l in range(0,len(tag)):
list_all_img_id.append(get_images_id(tag[l][0],all_model_id[i][0]))
return(list_all_img_id)
def main():
all_model_id=get_models_id()
len_all_model_id=len(all_model_id)
div_total = int(len_all_model_id / 3)
rest_div_total = len_all_model_id%3
lst_args = [(0, div_total), (div_total, div_total*2), (div_total*2, div_total*3 + rest_div_total + 1)]
pool = mp.Pool(processes=3)
res = pool.map(funcs, list_args) # you can loop through res to get your results
if __name__ == "__main__":
main()
答案 2 :(得分:0)
使用concurrent.futures module试试。
ThreadPoolExecutor(max_workers = 10)
(您可以指定最大工人数)。
此外,如果您要多个进程,而不是线程。 您只需将 ThreadPoolExecutor 替换为 ProcessPoolExecutor
tag=[]
all_model_id=get_models_id()
liste_all_img_id=[]
def func(model_id):
tag = get_tags(model_id[0])
for l in range(0,len(tag)):
liste_all_img_id.append(get_images_id(tag[l][0],model_id[0]))
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(func, all_model_id)