在150多个请求之后,Python asyncio和aiohttp放慢了速度

时间:2018-01-15 00:46:29

标签: python performance python-asyncio aiohttp

我正在使用asyncio和aiohttp来制作异步刮刀。 出于某种原因,在我达到150+请求后,它开始放缓。第一个异步 在我得到链接的地方运行良好。第二个是我遇到缓慢发生的问题。就像200之后,一个请求需要1分钟。知道为什么吗?我错误地使用Asyncio或aiohttp吗? 编辑:我在7gb ram上运行这个localy,所以我不认为我的内存不足。

import aiohttp
import asyncio
import async_timeout
import re
from lxml import html
import timeit
from os import makedirs,chmod


basepath = ""
start = timeit.default_timer()
novel = ""
novel = re.sub(r"[^a-zA-Z0-9 ]+/", "", novel)
novel = re.sub(r" ", "-", novel)

novel_url = {}
@asyncio.coroutine
def get(*args, **kwargs):
    response = yield from aiohttp.request('GET', *args, **kwargs)
    return (yield from response.text())

def scrape_links(page):
    url = html.fromstring(page)
    links = url.xpath("")
    chapter_count = url.xpath("")
    dictonaries = dict(zip(chapter_count,links))
    novel_url.update(dictonaries)

def print_links(query):
    # Makedirs and apply chmod
    makedirs('%s/%s' % ( basepath,query ),exist_ok=True)
    makedirs('%s/%s/img' % (basepath, query),exist_ok=True)
    chmod('%s/%s' % ( basepath,query ), 0o765)
    chmod('%s/%s/img/' % ( basepath,query ), 0o765)

    url = 'https://www.examplesite.org/' + query
    page = yield from get(url, compress=True)
    magnet = scrape_links(page)


loop = asyncio.get_event_loop()
f = asyncio.wait([print_links(novel)])
loop.run_until_complete(f)


##### now getting chapters from links array

def scrape_chapters(page, i):
    url = html.fromstring(page)
    title = url.xpath("")
    title = ''.join(title)
    title = re.sub(r"", "", title)
    chapter = url.xpath("")
    # Use this to join them insteed of looping though if it doesn't work in epub maker
    # chapter = '\n'.join(chapter)
    print(title)
    # file = open("%s/%s/%s-%s.html" % (basepath, novel, novel, i), 'w+')
    # file.write("<h1>%s</h1>" % title)
    # for x in chapter:
    #     file.write("\n<p>%s</p>" % x)
    # file.close()

def print_chapters(query):
    chapter = (str(query[0]))
    chapter_count = re.sub(r"CH ", "", chapter)
    page = yield from get(query[1], compress=True)
    chapter = scrape_chapters(page, chapter_count)

loop = asyncio.get_event_loop()
f = asyncio.wait([print_chapters(d) for d in novel_url.items()])
loop.run_until_complete(f)

stop = timeit.default_timer()
print("\n")
print(stop - start)

0 个答案:

没有答案