我正在编写脚本以并行进行数百万次API调用。
我为此目的使用Python 3.6和aiohttp。 我期待uvloop会让它更快,但它似乎让它变慢了。我做错了吗?
使用uvloop:22秒
没有uvloop:15秒
import asyncio
import aiohttp
import uvloop
import time
import logging
from aiohttp import ClientSession, TCPConnector
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
urls = ["http://www.yahoo.com","http://www.bbcnews.com","http://www.cnn.com","http://www.buzzfeed.com","http://www.walmart.com","http://www.emirates.com","http://www.kayak.com","http://www.expedia.com","http://www.apple.com","http://www.youtube.com"]
bigurls = 10 * urls
def run(enable_uvloop):
try:
if enable_uvloop:
loop = uvloop.new_event_loop()
else:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
start = time.time()
conn = TCPConnector(limit=5000, use_dns_cache=True, loop=loop, verify_ssl=False)
with ClientSession(connector=conn) as session:
tasks = asyncio.gather(*[asyncio.ensure_future(do_request(url, session)) for url in bigurls]) # tasks to do
results = loop.run_until_complete(tasks) # loop until done
end = time.time()
logger.debug('total time:')
logger.debug(end - start)
return results
loop.close()
except Exception as e:
logger.error(e, exc_info=True)
async def do_request(url, session):
"""
"""
try:
async with session.get(url) as response:
resp = await response.text()
return resp
except Exception as e:
logger.error(e, exc_info=True)
run(True)
#run(False)
答案 0 :(得分:0)
我尝试了类似的实验,并且发现并行http GET的uvloop和asyncio事件循环之间没有真正的区别:
asyncio event loop: avg=3.6285968542099 s. stdev=0.5583842811362075 s.
uvloop event loop: avg=3.419699764251709 s. stdev=0.13423859428541632 s.
当uvloop用于服务器代码(即处理许多传入请求)时,它可能会发挥明显的作用。
代码:
import time
from statistics import mean, stdev
import asyncio
import uvloop
import aiohttp
urls = [
'https://aws.amazon.com', 'https://google.com', 'https://microsoft.com', 'https://www.oracle.com/index.html'
'https://www.python.org', 'https://nodejs.org', 'https://angular.io', 'https://www.djangoproject.com',
'https://reactjs.org', 'https://www.mongodb.com', 'https://reinvent.awsevents.com',
'https://kafka.apache.org', 'https://github.com', 'https://slack.com', 'https://authy.com',
'https://cnn.com', 'https://fox.com', 'https://nbc.com', 'https://www.aljazeera.com',
'https://fly4.emirates.com', 'https://www.klm.com', 'https://www.china-airlines.com',
'https://en.wikipedia.org/wiki/List_of_Unicode_characters', 'https://en.wikipedia.org/wiki/Windows-1252'
]
def timed(func):
async def wrapper():
start = time.time()
await func()
return time.time() - start
return wrapper
@timed
async def main():
conn = aiohttp.TCPConnector(use_dns_cache=False)
async with aiohttp.ClientSession(connector=conn) as session:
coroutines = [fetch(session, url) for url in urls]
await asyncio.gather(*coroutines)
async def fetch(session, url):
async with session.get(url) as resp:
await resp.text()
asycio_results = [asyncio.run(main()) for i in range(10)]
print(f'asyncio event loop: avg={mean(asycio_results)} s. stdev={stdev(asycio_results)} s.')
# Change to uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
uvloop_results = [asyncio.run(main()) for i in range(10)]
print(f'uvloop event loop: avg={mean(uvloop_results)} s. stdev={stdev(uvloop_results)} s.')
答案 1 :(得分:0)
aiohttp
建议使用aiodns
而且,我记得,这个with ClientSession(connector=conn) as session:
应该是异步的
答案 2 :(得分:0)
你并不孤单;实际上,我得到了类似的结果(这使我搜索了我的发现并将我带到这里)。
我的实验涉及使用aiohttp向Google.com运行500个并发GET请求。
以下是参考代码:
import asyncio, aiohttp, concurrent.futures
from datetime import datetime
import uvloop
class UVloopTester():
def __init__(self):
self.timeout = 20
self.threads = 500
self.totalTime = 0
self.totalRequests = 0
@staticmethod
def timestamp():
return f'[{datetime.now().strftime("%H:%M:%S")}]'
async def getCheck(self):
async with aiohttp.ClientSession() as session:
response = await session.get('https://www.google.com', timeout=self.timeout)
response.close()
await session.close()
return True
async def testRun(self, id):
now = datetime.now()
try:
if await self.getCheck():
elapsed = (datetime.now() - now).total_seconds()
print(f'{self.timestamp()} Request {id} TTC: {elapsed}')
self.totalTime += elapsed
self.totalRequests += 1
except concurrent.futures._base.TimeoutError: print(f'{self.timestamp()} Request {id} timed out')
async def main(self):
await asyncio.gather(*[asyncio.ensure_future(self.testRun(x)) for x in range(self.threads)])
def start(self):
# comment these lines to toggle
uvloop.install()
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
loop = asyncio.get_event_loop()
now = datetime.now()
loop.run_until_complete(self.main())
elapsed = (datetime.now() - now).total_seconds()
print(f'{self.timestamp()} Main TTC: {elapsed}')
print()
print(f'{self.timestamp()} Average TTC per Request: {self.totalTime / self.totalRequests}')
if len(asyncio.Task.all_tasks()) > 0:
for task in asyncio.Task.all_tasks(): task.cancel()
try: loop.run_until_complete(asyncio.gather(*asyncio.Task.all_tasks()))
except asyncio.CancelledError: pass
loop.close()
test = UVloopTester()
test.start()
我还没有计划并执行任何仔细的实验,我要记录自己的发现并计算标准偏差和p值。但是我已经(累)了几次了,并得出了以下结果。
在没有uvloop的情况下运行:
运行uvloop:
我已经与我的一个朋友共享了此代码,他实际上是建议我尝试uvloop的朋友(因为他从中获得了速度提升)。多次运行它后,他的结果证实他实际上使用uvloop确实提高了速度(平均完成main()和请求的时间较短)。
我们的发现使我相信我们发现的差异与我们的设置有关:我在中型笔记本电脑上使用具有8 GB RAM的Debian虚拟机,而他使用的本地Linux桌面上有很多引擎盖下更多的“肌肉”。
我对您的问题的回答是:不,我不认为您做错了任何事情,因为我遇到了相同的结果,尽管有建设性的批评,但看来我没有做错任何事情欢迎和赞赏。
我希望我能有所帮助;我希望我的配合会有所帮助。