我正在使用asyncio/aiohttp
向不同的网站发送异步GET
请求。计划是从redis队列中获取100个网址并异步发送GET
请求。然后获取另外100个网址并重复此过程。此外,如果URL失败(超时或HTTP_status == 403),则进程会将其添加到队列末尾。我已经编写了一个代码来实现这个目标,但它会在一段时间后冻结。谁能告诉我如何实现它?这是我的代码:
import asyncio
from aiohttp import ClientSession
import async_timeout
import aiohttp
import aiosocks
import redis
import json
url_list = []
async def fetch(url, session,r_server):
agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
headers = {'user-Agent': agent,'accept-Language':'en-US,en;q=0.8','accept':'text/javascript, application/javascript,
application/ecmascript, application/x-ecmascript, */*; q=0.01',
'accept-Encoding':'gzip, deflate, sdch, br','x-requested-with':'XMLHttpRequest'}
with async_timeout.timeout(100):
async with session.get(url,headers=headers) as response:
status = response.status
# Store status code somewhere
...
async def bound_fetch(sem, url, session,r_server):
# Getter function with semaphore.
async with sem:
try:
await fetch(url, session,r_server)
except Exception as e:
print ("In semaphore",e,url)
# Push url in redis queue
...
async def run(url_list,r_server):
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(1000)
# Create client session that will ensure we dont open new connection
# per request.
async with ClientSession() as session:
for url in url_list:
# pass Semaphore and session to every GET request
task = asyncio.ensure_future(bound_fetch(sem, url, session,r_server))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
async def get_url_list(r_server):
url_list = []
# Get url list from redis: queue_list
for docs in queue_list:
doc = json.loads(docs.decode("utf-8"))
url = doc["url"]
url_list.append(url)
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(url_list,r_server))
loop.run_until_complete(future)
if __name__ == "__main__":
r_server = redis.Redis("localhost")
while True:
get_url_list(r_server)
time.sleep(5)
答案 0 :(得分:0)
<强> Ouestion 强>: 如何在asyncio python中运行True?
替换
loop.run_until_complete(future)
与
loop.run_forever(future)