Scrapy 1.3.3。有时发生的反应是无。
追踪(最近的呼叫最后):
_inlineCallbacks中的文件“/home/vetos/venvs/aliber/lib/python3.5/site-packages/twisted/internet/defer.py”,第1301行 result = g.send(result)文件“/home/vetos/venvs/aliber/lib/python3.5/site-packages/scrapy/core/downloader/middleware.py”,第47行,在process_response中
断言响应不是无,'在process_response'中收到无
AssertionError:在process_response
中收到无
我有一个中间件:
class CustomProcessProxyResponse(object):
EXCEPTIONS_TO_RETRY = (defer.TimeoutError, TimeoutError, DNSLookupError,
ConnectionRefusedError, ConnectionDone, ConnectError,
ConnectionLost, TCPTimedOutError, ResponseFailed,
IOError, TunnelError)
RETRY_HTTP_CODES = [500, 502, 503, 504, 408, 403, 401, 400, 404, 408]
def __init__(self, settings):
pass
@classmethod
def from_crawler(cls, crawler):
return cls(crawler.settings)
def process_response(self, request, response, spider):
if response is None:
reason = ('Response is None; Request {}'.format(request))
return self._retry(request, reason, spider) or response
if response.status in self.RETRY_HTTP_CODES:
reason = response_status_message(response.status)
return self._retry(request, reason, spider) or response
return response
def process_exception(self, request, exception, spider):
return self._retry(request, exception, spider)
def _retry(self, request, reason, spider):
proxy = request.meta['proxy']
logger.debug("Gave up fail %(request)s (failed %(proxy)s ): %(reason)s",
{'request': request, 'proxy': proxy, 'reason': reason},
extra={'spider': spider})
ProxyManager().remove_from_databаse(proxy)
retryreq = request.copy()
retryreq.dont_filter = True
retryreq.productId = request.productId
return retryreq
settings.py:
DOWNLOADER_MIDDLEWARES = {
'aliexpress_affiliate_api.middlewares.CustomProxyMiddleware': 100,
'aliexpress_affiliate_api.middlewares.CustomProcessProxyResponse': 990,}
class CustomProxyMiddleware(object):
def process_request(self, request, spider):
if spider.name == AliProductDetailExtender.name:
proxy_manager = ProxyManager()
proxy_server, country = proxy_manager.get_random_proxy_from_database()
request.meta['proxy'] = proxy_server
request.meta['proxy_country'] = country
print(request.meta['proxy'])
执行错误拦截和重试请求,但有时会发生此错误。如何在无时拦截答案?