API调用未同步运行

时间:2018-09-15 15:49:32

标签: python python-3.x python-asyncio

我正在使用一个python客户端,它将异步下载乙烯基封面。我的问题是,我是python(尤其是异步python)的新手,并且我认为我的代码不会异步运行。我有另一个用Node.js编写的客户端,它可以获取大约。 40张/秒,而这个python只能达到1.5张/秒。

import aiohttp
import asyncio

from os import path,makedirs

caa_base_url = "https://coverartarchive.org/release"
image_download_dir = path.realpath('images')
# small,large, None = Max
image_size = None
async def getImageUrls(release_mbid,session):
  async with session.get(f'{caa_base_url}/{release_mbid}') as resp:
    if resp.status == 404 or resp.status == 403:
      return
    return [release_mbid,await resp.json()]

async def getImage(url,session):
  try:
    async with session.get(url) as resp:
      return [url,await resp.read()]
  except (aiohttp.ServerDisconnectedError):
    return await getImage(url,session)

async def getMBIDs(mb_page_url):
  async with aiohttp.ClientSession() as session:
    async with session.get(mb_page_url) as resp:
      mb_json = await resp.json()

    tasks = []
    async with aiohttp.ClientSession() as caa_session:
      for release in mb_json["releases"]:
        task = asyncio.ensure_future(getImageUrls(release["id"],caa_session))
        tasks.append(task)
      responses = await asyncio.gather(*tasks)

      async with aiohttp.ClientSession() as caa_image_session:
        for response in responses:
          if response is not None:
            caaTasks = []
            release_mbid = response[0]
            result = response[1]
            for image in result["images"]:
              if image["front"] == True:
                caaTask = asyncio.ensure_future(getImage(image["image"],caa_session))
                caaTasks.append(caaTask)
            image_responses = await asyncio.gather(*caaTasks)
            for image_response in image_responses:
              image_url = image_response[0]
              image_binary = image_response[1]
              new_file_dir = path.join(image_download_dir,release_mbid)
              if not path.isdir(new_file_dir):
                makedirs(new_file_dir)
              file_name = image_url[image_url.rfind("/")+1:]
              file_path = path.join(new_file_dir,file_name)
              new_file = open(file_path,'wb')
              new_file.write(image_binary)          

mb_base_url = "https://musicbrainz.org/ws/2/release"
num_pages = 100
releases_per_page = 100

mb_page_urls = []

async def getMBPages():
  for page_index in range(num_pages):
    await getMBIDs('%s?query=*&type=album&format=Vinyl&limit=%s&offset=%s&fmt=json' % (mb_base_url,releases_per_page,page_index*releases_per_page))
    await asyncio.sleep(1)

loop = asyncio.get_event_loop()
loop.run_until_complete(getMBPages())

P.S。睡眠是因为musicbrainz api限制为每秒1个请求

0 个答案:

没有答案