对于大量的协程,我试图定期腌制完成和 todo 任务。示例代码如下:
import asyncio
import pandas as pd
import numpy as np
import pickle
import random
import time
from typing import Callable, Coroutine
async def coro():
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
await asyncio.sleep(random.randint(0,15))
return df
async def todo_tasks():
for i in range(500):
todo.add(asyncio.create_task(coro(), name = 'coro'+str(i)))
async def progressAsync(algo: Callable[...,Coroutine],
timeout=2) -> None:
task = asyncio.create_task(algo(), name=algo.__name__)
todo.add(task) # add task to the asyncio loop
start = time.time()
while len(todo):
# checkpoints at timeout
done, pending = await asyncio.wait(todo, timeout=timeout)
# remove done task from todo after timeout, update result and pickle it
todo.difference_update(done)
result.update(done)
# ...the following does not work!!!
with open('./result.pkl', 'wb') as f:
pickle.dump(list(result), f)
with open('./todo.pkl', 'wb') as f:
pickle.dump(list(todo), f)
# report pendings
pending_names = (t.get_name() for t in todo)
print(f"{len(todo)}: "+ " ".join(sorted(pending_names))[-75:])
# success!
end = time.time()
print(f"Took {int(end-start)} seconds")
todo = set()
result = set()
asyncio.run(progressAsync(algo=todo_tasks))
除了 pickle.dump 部分中的部分之外,该程序运行良好。它给出:TypeError: cannot pickle 'coroutine' object
。
有没有一种方法可以暂时保存/整理结果?目的是为剩余的待办事项重新启动progressAsync并将结果分别用于其他处理。