我需要一个程序,需要从大量的API端点列表中收集数据并进行一些处理。下面是一个模拟程序,试图尽快发出10000个请求。关于如何改进这一点(尤其是速度)的任何建议都非常受欢迎。实验表明,大约100的信号量上限给出了最佳速度。
import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys
def processData(data):
time.sleep(0.001)
return data
async def fetch(url, session):
async with session.get(url) as response:
data = await response.read()
data = processData(data)
return data
async def bound_fetch(sem, url, session):
async with sem:
return await fetch(url, session)
async def run(loop,N):
url = "https://www.example.com"
tasks = []
sem = asyncio.Semaphore(100)
async with ClientSession() as session:
for i in range(N):
task = loop.create_task(bound_fetch(sem, url, session))
tasks.append(task)
print("Done starting {} tasks".format(N))
starttime = time.time()
print(datetime.datetime.now())
responses = await asyncio.gather(*tasks)
print("Done completing {} tasks in: {}".format(N,time.time()-starttime))
return responses
args = sys.argv
loop = asyncio.get_event_loop()
if __name__ == "__main__":
if len(sys.argv) == 2:
N = int(sys.argv[1])
else:
N = 10000
maintask = loop.create_task(run(loop, N))
result = loop.run_until_complete(maintask)
print(len(result))