我正在读取40MB(一百万行)的网址,并访问它们以在页面上找到字符。当我访问时,我发现该请求将始终被暂停或阻止,无法确定要等待什么?等待请求的响应? 该代码是在github上找到的并发应用程序,然后对其进行了修改。您能帮我看看问题出在哪里吗?谢谢〜
#!/usr/bin/python3
# encoding: utf-8
# time : 2019/6/1 20:31
import asyncio
from itertools import islice
import aiofiles as aiofiles
from aiohttp import ClientSession
try:
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
except ImportError:
pass
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
}
def iter_list():
all_url = []
with open("Domain_list.txt") as input_file:
for line in islice(input_file, 0, None):
all_url.append(line.strip())
return all_url
async def save_txt(url):
async with aiofiles.open('ok.txt', 'a+') as f:
await f.write(url + '\n')
async def fetch(item, session):
try:
async with session.post(url=item, headers=headers, timeout=None) as req:
"""
Find character '<<<EOT' And save
"""
code = '<<<EOT'
html = await req.text()
if req.status in [200, 201]:
if html.find(code) > 2:
await save_txt(item)
await print(item, html.find(code))
else:
await print(f'{item}not!')
else:
await print(f'{item},{req.status()}')
except:
pass
async def bound_fetch(sem, url, session):
# Getter function with semaphore.
async with sem:
await fetch(url, session)
async def run(lists):
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(500)
# Create client session that will ensure we dont open new connection
# per each request.
async with ClientSession() as session:
for i in lists:
# pass Semaphore and session to every GET request
task = asyncio.ensure_future(bound_fetch(sem, i, session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
loop = asyncio.get_event_loop()
lists = iter_list()
future = asyncio.ensure_future(run(lists))
loop.run_until_complete(future)