我有一个访问链接的列表。长清单:
fake_list = [...] #It does contains links
我必须访问这些链接中的每一个并检查是否是死链接-404-或实时链接-200-。如果我遍历每个发出请求的链接,它会在一对之后失败,所以我决定选择多线程,在这种情况下使用Pooling并且它适用于中等列表? [1-8个链接?]。请参阅以下代码:
import futures
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
results = list(pool.map(self.check_url, fake_list ))
该代码使用'check_url'方法映射每个链接,该方法根据链接特征(url,status,access_date)创建元组。
即使我增加了工人的数量,如果'fake_list'包含很多工作,它也不会起作用。 :(
我的问题是:
如何管理更多容量?能够'问'更多 链接他们是否还活着?
亲切的问候。
###################################################################
我认为问题在于我所提供的链接。如果我从谷歌那里获取100个断开的链接,它将顺利运行我将下面的代码:
async def fetch(url):
connector = aiohttp.TCPConnector(verify_ssl=False)
async with aiohttp.request('get', url, connector=connector) as response:
return response
#return await response.read()
async def run(loop, r):
url = "https://google.com/{}" # IMPORTANT!!!!!
tasks = []
for i in range(r):
task = asyncio.ensure_future(fetch(url.format(i)))
tasks.append(task)
responses = await asyncio.gather(*tasks)
# you now have all response bodies in this variable
print_responses(responses)
def print_responses(result):
for i in result:
print(i)
print("status: {}".format(i.status))
print("url: {}".format(i.url))
print("headers: {}".format(i.headers))
print("date: {}".format(i.headers['DATE']))
print("\n\n")
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(loop,100))
loop.run_until_complete(future)
如果将网址指向google.com/xxxx
,我使用此链接:
url = "https://torrentz.eu/{}"
它确实失败了,结果如下:
Traceback (most recent call last):
File "C:\Users\Sebas\Desktop\Entura\entura_code_test_seb\entura_code_test\linkchecker\corelogic\linkchecker - copia.py", line 59, in <module>
loop.run_until_complete(future)
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\base_events.py", line 387, in run_until_complete
return future.result()
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\futures.py", line 274, in result
raise self._exception
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\tasks.py", line 241, in _step
result = coro.throw(exc)
File "C:\Users\Sebas\Desktop\Entura\entura_code_test_seb\entura_code_test\linkchecker\corelogic\linkchecker - copia.py", line 43, in run
responses = await asyncio.gather(*tasks)
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\futures.py", line 361, in __iter__
yield self # This tells Task to wait for completion.
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\tasks.py", line 296, in _wakeup
future.result()
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\futures.py", line 274, in result
raise self._exception
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\tasks.py", line 241, in _step
result = coro.throw(exc)
File "C:\Users\Sebas\Desktop\Entura\entura_code_test_seb\entura_code_test\linkchecker\corelogic\linkchecker - copia.py", line 32, in fetch
async with aiohttp.request('get', url, connector=connector) as response:
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\site-packages\aiohttp\client.py", line 537, in __aenter__
self._resp = yield from self._coro
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\site-packages\aiohttp\client.py", line 197, in _request
raise aiohttp.ClientResponseError() from exc
aiohttp.errors.ClientResponseError
Traceback (most recent call last):
File "C:\Users\Sebas\Desktop\Entura\entura_code_test_seb\entura_code_test\linkchecker\corelogic\linkchecker - copia.py", line 59, in <module>
loop.run_until_complete(future)
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\base_events.py", line 387, in run_until_complete
return future.result()
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\futures.py", line 274, in result
raise self._exception
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\tasks.py", line 241, in _step
result = coro.throw(exc)
File "C:\Users\Sebas\Desktop\Entura\entura_code_test_seb\entura_code_test\linkchecker\corelogic\linkchecker - copia.py", line 43, in run
responses = await asyncio.gather(*tasks)
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\futures.py", line 361, in __iter__
yield self # This tells Task to wait for completion.
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\tasks.py", line 296, in _wakeup
future.result()
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\futures.py", line 274, in result
raise self._exception
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\asyncio\tasks.py", line 241, in _step
result = coro.throw(exc)
File "C:\Users\Sebas\Desktop\Entura\entura_code_test_seb\entura_code_test\linkchecker\corelogic\linkchecker - copia.py", line 32, in fetch
async with aiohttp.request('get', url, connector=connector) as response:
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\site-packages\aiohttp\client.py", line 537, in __aenter__
self._resp = yield from self._coro
File "C:\Users\Sebas\AppData\Local\Programs\Python\Python35-32\lib\site-packages\aiohttp\client.py", line 197, in _request
raise aiohttp.ClientResponseError() from exc
aiohttp.errors.ClientResponseError
>>>
似乎服务器拒绝了我对该网站的所有请求???
这里有什么想法吗?
谢谢!