我想实现一个并行的request.get()
函数,该函数处理请求队列并将结果放入列表中,列表完成后将由标准顺序代码处理。我尝试了以下操作,但是我的代码没有结束,并且不显示ID。
import requests
from queue import Queue
from threading import Thread
BASE = 'http://www.uniprot.org'
KB_ENDPOINT = '/uniprot/'
FORMAT = ".xml"
num_threads = 10
ID_q = Queue()
ID_data = Queue()
# worker function
def get_ID_data(ID_q, ID_data, BASE, KB_ENDPOINT, FORMAT):
while True:
ID = ID_q.get()
print(ID)
ID_data.put(requests.get(BASE + KB_ENDPOINT + ID + FORMAT))
ID_q.task_done()
ID_data.task_done()
# initialize worker
for i in range(num_threads):
worker = Thread(target=get_ID_data, args=(ID_q, ID_data, BASE, KB_ENDPOINT, FORMAT))
worker.setDaemon(True)
worker.start()
# load IDs and put in queue
ID_list = ["A6ZMA9", "N1P5E6",
"H0GM11", "H0GZ91",
"A0A0L8VK54", "G2WKA0",
"C8ZEQ4", "B5VPH8",
"B3LLU5", "C7GL72",
"J8QFS9", "J8Q1C1",
"A0A0L8RDV1"]
for ID in ID_list:
ID_q.put(ID)
ID_q.join()
# work with ID_data
print(ID_data)
更新: 我使用asyncio和aiohttp将@pkqxdd答案更改为此:
import asyncio,aiohttp
IDs = ["A6ZMA9", "N1P5E6",
"H0GM11", "H0GZ91",
"A0A0L8VK54", "G2WKA0",
"C8ZEQ4", "B5VPH8",
"B3LLU5", "C7GL72",
"J8QFS9", "J8Q1C1",
"A0A0L8RDV1"]
BASE = 'http://www.uniprot.org'
KB_ENDPOINT = '/uniprot/'
FORMAT = ".xml"
async def get_data_coroutine(session, ID):
async with session.get(BASE + KB_ENDPOINT + ID + FORMAT) as response:
res = await response.text()
print(ID)
if not res:
raise NameError('{} is not available'.format(ID))
return res
async def main(loop):
async with aiohttp.ClientSession(loop=loop) as session:
tasks = [get_data_coroutine(session, ID) for ID in IDs]
return await asyncio.gather(*tasks)
loop = asyncio.get_event_loop()
result = loop.run_until_complete(main(loop))
答案 0 :(得分:2)
由于您已经提到了异步,所以我假设您使用的是Python3.6或更高版本。
requests
库实际上并不支持异步编程,并且试图使其异步是一种死胡同。一个更好的主意是改用aiohttp
。
您可以使用以下简单代码来实现自己的目标:
import asyncio,aiohttp
BASE = 'http://www.uniprot.org'
KB_ENDPOINT = '/uniprot/'
FORMAT = ".xml"
ID_list = ["A6ZMA9", "N1P5E6",
"H0GM11", "H0GZ91",
"A0A0L8VK54", "G2WKA0",
"C8ZEQ4", "B5VPH8",
"B3LLU5", "C7GL72",
"J8QFS9", "J8Q1C1",
"A0A0L8RDV1"]
session=aiohttp.ClientSession()
async def get_data(ID):
async with session.get(BASE + KB_ENDPOINT + ID + FORMAT) as response:
return await response.text()
coros=[]
for ID in ID_list:
coros.append(get_data(ID))
loop=asyncio.get_event_loop()
fut=asyncio.gather(*coros)
loop.run_until_complete(fut)
print(fut.result())
(是的,我看到了警告。但是我真的不想使答案更复杂。您应该对其进行更改以更好地满足您的目的。)