我正在使用一个python客户端,它将异步下载乙烯基封面。我的问题是,我是python(尤其是异步python)的新手,并且我认为我的代码不会异步运行。我有另一个用Node.js编写的客户端,它可以获取大约。 40张/秒,而这个python只能达到1.5张/秒。
import aiohttp
import asyncio
from os import path,makedirs
caa_base_url = "https://coverartarchive.org/release"
image_download_dir = path.realpath('images')
# small,large, None = Max
image_size = None
async def getImageUrls(release_mbid,session):
async with session.get(f'{caa_base_url}/{release_mbid}') as resp:
if resp.status == 404 or resp.status == 403:
return
return [release_mbid,await resp.json()]
async def getImage(url,session):
try:
async with session.get(url) as resp:
return [url,await resp.read()]
except (aiohttp.ServerDisconnectedError):
return await getImage(url,session)
async def getMBIDs(mb_page_url):
async with aiohttp.ClientSession() as session:
async with session.get(mb_page_url) as resp:
mb_json = await resp.json()
tasks = []
async with aiohttp.ClientSession() as caa_session:
for release in mb_json["releases"]:
task = asyncio.ensure_future(getImageUrls(release["id"],caa_session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
async with aiohttp.ClientSession() as caa_image_session:
for response in responses:
if response is not None:
caaTasks = []
release_mbid = response[0]
result = response[1]
for image in result["images"]:
if image["front"] == True:
caaTask = asyncio.ensure_future(getImage(image["image"],caa_session))
caaTasks.append(caaTask)
image_responses = await asyncio.gather(*caaTasks)
for image_response in image_responses:
image_url = image_response[0]
image_binary = image_response[1]
new_file_dir = path.join(image_download_dir,release_mbid)
if not path.isdir(new_file_dir):
makedirs(new_file_dir)
file_name = image_url[image_url.rfind("/")+1:]
file_path = path.join(new_file_dir,file_name)
new_file = open(file_path,'wb')
new_file.write(image_binary)
mb_base_url = "https://musicbrainz.org/ws/2/release"
num_pages = 100
releases_per_page = 100
mb_page_urls = []
async def getMBPages():
for page_index in range(num_pages):
await getMBIDs('%s?query=*&type=album&format=Vinyl&limit=%s&offset=%s&fmt=json' % (mb_base_url,releases_per_page,page_index*releases_per_page))
await asyncio.sleep(1)
loop = asyncio.get_event_loop()
loop.run_until_complete(getMBPages())
P.S。睡眠是因为musicbrainz api限制为每秒1个请求