我已经从here编辑了这段代码:
import asyncio
import time
from aiohttp import ClientPayloadError
from aiohttp import ClientSession
COUNTER = 1
async def fetch(url, session):
async with session.get(url) as response:
delay = response.headers.get("DELAY")
date = response.headers.get("DATE")
global COUNTER
COUNTER +=1
print("{}. {}:{} with delay {}".format(str(COUNTER), date, response.url, delay))
try:
return await response.text()
except ClientPayloadError:
print("ERROR: ".format(url))
async def bound_fetch(sem, url, session):
# Getter function with semaphore.
async with sem:
await fetch(url, session)
async def run():
urls = [build_url(id) for id in load_ids()]
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(1000)
# Create client session that will ensure we dont open new connection
# per each request.
async with ClientSession(conn_timeout=10000, read_timeout=10000) as session:
for url in urls:
#pass Semaphore and session to every GET request
task = asyncio.ensure_future(bound_fetch(sem, url, session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
def build_url(id):
url = 'http://api.metagenomics.anl.gov/annotation/sequence/{}?source=Refseq'.format(id)
return url
def load_ids():
#in the "real" code I will load a file here and extract the ids
return """
mgm4558908.3
mgm4484962.3
mgm4734169.3
mgm4558911.3
mgm4484983.3
mgm4558918.3
""".strip().split()
start = time.clock()
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
run_time = (start - time.clock())/60
print("this took: {} minutes".format(run_time))
我知道我可以使用以下方式打印响应数据:print(await response.text())
但是我没有进入异步代码,因此我无法弄清楚应该如何以及在何处打开文件并写入它。因为我认为有某种线程可能会在同时写入同一文件时引起问题(我熟悉多处理)。
答案 0 :(得分:1)
async 不是多处理或线程 在你的情况下,你可以尝试这样的smt:
with open(file, "w"):
async for s in run():
f.write(s)
此外,您可以尝试使用aiofiles
或curio
作为文件AI / O