Question

我正在编写脚本以并行进行数百万次API调用。

我为此目的使用Python 3.6和aiohttp。我期待uvloop会让它更快，但它似乎让它变慢了。我做错了吗？

使用uvloop：22秒

没有uvloop：15秒

import asyncio
import aiohttp
import uvloop
import time
import logging

from aiohttp import ClientSession, TCPConnector

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()

urls = ["http://www.yahoo.com","http://www.bbcnews.com","http://www.cnn.com","http://www.buzzfeed.com","http://www.walmart.com","http://www.emirates.com","http://www.kayak.com","http://www.expedia.com","http://www.apple.com","http://www.youtube.com"]
bigurls = 10 * urls

def run(enable_uvloop):
    try:
        if enable_uvloop:
            loop = uvloop.new_event_loop()
        else:
            loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        start = time.time()
        conn = TCPConnector(limit=5000, use_dns_cache=True, loop=loop, verify_ssl=False)
        with ClientSession(connector=conn) as session:
            tasks = asyncio.gather(*[asyncio.ensure_future(do_request(url, session)) for url in bigurls]) # tasks to do
            results = loop.run_until_complete(tasks) # loop until done
            end = time.time()
            logger.debug('total time:')
            logger.debug(end - start)
            return results
        loop.close()
    except Exception as e:
        logger.error(e, exc_info=True)

async def do_request(url, session):
    """
    """
    try:
        async with session.get(url) as response:
            resp = await response.text()
            return resp
    except Exception as e:
        logger.error(e, exc_info=True)

run(True)
#run(False)

Answer 1

我尝试了类似的实验，并且发现并行http GET的uvloop和asyncio事件循环之间没有真正的区别：

asyncio event loop: avg=3.6285968542099 s. stdev=0.5583842811362075 s.
uvloop event loop: avg=3.419699764251709 s. stdev=0.13423859428541632 s.

当uvloop用于服务器代码（即处理许多传入请求）时，它可能会发挥明显的作用。

代码：

import time
from statistics import mean, stdev
import asyncio
import uvloop
import aiohttp

urls = [
    'https://aws.amazon.com', 'https://google.com', 'https://microsoft.com', 'https://www.oracle.com/index.html'
    'https://www.python.org', 'https://nodejs.org', 'https://angular.io', 'https://www.djangoproject.com',
    'https://reactjs.org', 'https://www.mongodb.com', 'https://reinvent.awsevents.com',
    'https://kafka.apache.org', 'https://github.com', 'https://slack.com', 'https://authy.com',
    'https://cnn.com', 'https://fox.com', 'https://nbc.com', 'https://www.aljazeera.com',
    'https://fly4.emirates.com', 'https://www.klm.com', 'https://www.china-airlines.com',
    'https://en.wikipedia.org/wiki/List_of_Unicode_characters', 'https://en.wikipedia.org/wiki/Windows-1252'
]

def timed(func):
    async def wrapper():
        start = time.time()
        await func()
        return time.time() - start
    return wrapper

@timed
async def main():
    conn = aiohttp.TCPConnector(use_dns_cache=False)
    async with aiohttp.ClientSession(connector=conn) as session:
        coroutines = [fetch(session, url) for url in urls]
        await asyncio.gather(*coroutines)

async def fetch(session, url):
    async with session.get(url) as resp:
        await resp.text()

asycio_results = [asyncio.run(main()) for i in range(10)]
print(f'asyncio event loop: avg={mean(asycio_results)} s. stdev={stdev(asycio_results)} s.')

# Change to uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

uvloop_results = [asyncio.run(main()) for i in range(10)]
print(f'uvloop event loop: avg={mean(uvloop_results)} s. stdev={stdev(uvloop_results)} s.')

Answer 2

aiohttp建议使用aiodns

而且，我记得，这个with ClientSession(connector=conn) as session:应该是异步的

Answer 3

你并不孤单；实际上，我得到了类似的结果（这使我搜索了我的发现并将我带到这里）。

我的实验涉及使用aiohttp向Google.com运行500个并发GET请求。

以下是参考代码：

import asyncio, aiohttp, concurrent.futures
from datetime import datetime
import uvloop


class UVloopTester():
    def __init__(self):
        self.timeout = 20
        self.threads = 500
        self.totalTime = 0
        self.totalRequests = 0

    @staticmethod
    def timestamp():
        return f'[{datetime.now().strftime("%H:%M:%S")}]'

    async def getCheck(self):
        async with aiohttp.ClientSession() as session:
            response = await session.get('https://www.google.com', timeout=self.timeout)
            response.close()
        await session.close()
        return True

    async def testRun(self, id):
        now = datetime.now()
        try:
            if await self.getCheck():
                elapsed = (datetime.now() - now).total_seconds()
                print(f'{self.timestamp()} Request {id} TTC: {elapsed}')
                self.totalTime += elapsed
                self.totalRequests += 1
        except concurrent.futures._base.TimeoutError: print(f'{self.timestamp()} Request {id} timed out')

    async def main(self):
        await asyncio.gather(*[asyncio.ensure_future(self.testRun(x)) for x in range(self.threads)])

    def start(self):
        # comment these lines to toggle
        uvloop.install()
        asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

        loop = asyncio.get_event_loop()
        now = datetime.now()
        loop.run_until_complete(self.main())
        elapsed = (datetime.now() - now).total_seconds()
        print(f'{self.timestamp()} Main TTC: {elapsed}')
        print()
        print(f'{self.timestamp()} Average TTC per Request: {self.totalTime / self.totalRequests}')
        if len(asyncio.Task.all_tasks()) > 0:
            for task in asyncio.Task.all_tasks(): task.cancel()
            try: loop.run_until_complete(asyncio.gather(*asyncio.Task.all_tasks()))
            except asyncio.CancelledError: pass
        loop.close()


test = UVloopTester()
test.start()

我还没有计划并执行任何仔细的实验，我要记录自己的发现并计算标准偏差和p值。但是我已经（累）了几次了，并得出了以下结果。

在没有uvloop的情况下运行：

loop.run_until_complete（main（））大约需要10秒钟。
完成请求的平均时间约为4秒。

运行uvloop：

loop.run_until_complete（main（））大约需要16秒。
完成请求的平均时间约为8.5秒。

我已经与我的一个朋友共享了此代码，他实际上是建议我尝试uvloop的朋友（因为他从中获得了速度提升）。多次运行它后，他的结果证实他实际上使用uvloop确实提高了速度（平均完成main（）和请求的时间较短）。

我们的发现使我相信我们发现的差异与我们的设置有关：我在中型笔记本电脑上使用具有8 GB RAM的Debian虚拟机，而他使用的本地Linux桌面上有很多引擎盖下更多的“肌肉”。

我对您的问题的回答是：不，我不认为您做错了任何事情，因为我遇到了相同的结果，尽管有建设性的批评，但看来我没有做错任何事情欢迎和赞赏。

我希望我能有所帮助；我希望我的配合会有所帮助。

aiohttp + uvloop并行HTTP请求比没有uvloop要慢

3 个答案: