将数据写入文件并结合异步http请求

时间:2017-09-27 18:50:12

标签: python asynchronous file-writing

我已经从here编辑了这段代码:

import asyncio
import time
from aiohttp import ClientPayloadError
from aiohttp import ClientSession

COUNTER = 1

async def fetch(url, session):

    async with session.get(url) as response:
        delay = response.headers.get("DELAY")
        date = response.headers.get("DATE")
        global COUNTER
        COUNTER +=1
        print("{}. {}:{} with delay {}".format(str(COUNTER), date, response.url, delay))
        try:
            return await response.text()
        except ClientPayloadError:
            print("ERROR: ".format(url))


async def bound_fetch(sem, url, session):
    # Getter function with semaphore.
    async with sem:
        await fetch(url, session)


async def run():
    urls = [build_url(id) for id in load_ids()]
    tasks = []
    # create instance of Semaphore
    sem = asyncio.Semaphore(1000)

    # Create client session that will ensure we dont open new connection
    # per each request.
    async with ClientSession(conn_timeout=10000, read_timeout=10000) as session:
        for url in urls:
           #pass Semaphore and session to every GET request
            task = asyncio.ensure_future(bound_fetch(sem, url, session))
            tasks.append(task)

        responses = asyncio.gather(*tasks)
        await responses

def build_url(id):
    url = 'http://api.metagenomics.anl.gov/annotation/sequence/{}?source=Refseq'.format(id)
    return url

def load_ids():
    #in the "real" code I will load a file here and extract the ids
    return """
    mgm4558908.3
    mgm4484962.3
    mgm4734169.3
    mgm4558911.3
    mgm4484983.3
    mgm4558918.3
    """.strip().split()


start = time.clock()
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
run_time = (start - time.clock())/60
print("this took: {} minutes".format(run_time))

我知道我可以使用以下方式打印响应数据:print(await response.text())但是我没有进入异步代码,因此我无法弄清楚应该如何以及在何处打开文件并写入它。因为我认为有某种线程可能会在同时写入同一文件时引起问题(我熟悉多处理)。

1 个答案:

答案 0 :(得分:1)

async 不是多处理线程 在你的情况下,你可以尝试这样的smt:

with open(file, "w"):
    async for s in run():
        f.write(s)

此外,您可以尝试使用aiofilescurio作为文件AI / O