使用python asyncio尽可能快地发出数千个HTTP请求

时间:2018-04-05 18:19:20

标签: python-3.6 python-asyncio aiohttp

我需要一个程序,需要从大量的API端点列表中收集数据并进行一些处理。下面是一个模拟程序,试图尽快发出10000个请求。关于如何改进这一点(尤其是速度)的任何建议都非常受欢迎。实验表明,大约100的信号量上限给出了最佳速度。

import asyncio
from aiohttp import ClientSession
import datetime
import time
import sys

def processData(data):
    time.sleep(0.001)
    return data

async def fetch(url, session):
    async with session.get(url) as response:
        data = await response.read()
        data = processData(data)
        return data

async def bound_fetch(sem, url, session):
    async with sem:
        return await fetch(url, session)


async def run(loop,N):
    url = "https://www.example.com"
    tasks = []
    sem = asyncio.Semaphore(100)
    async with ClientSession() as session:
        for i in range(N):
            task = loop.create_task(bound_fetch(sem, url, session))
            tasks.append(task)

        print("Done starting {} tasks".format(N))
        starttime = time.time()
        print(datetime.datetime.now())
        responses = await  asyncio.gather(*tasks)
        print("Done completing {} tasks in: {}".format(N,time.time()-starttime))

        return responses



args = sys.argv
loop = asyncio.get_event_loop()

if __name__ == "__main__":
    if len(sys.argv) == 2:
        N = int(sys.argv[1])
    else:
        N = 10000
    maintask = loop.create_task(run(loop, N))
    result = loop.run_until_complete(maintask)
    print(len(result))

0 个答案:

没有答案