这是我的代码。我正在使用BeautifulSoup。
pages = [i for i in range(300,499)]
# For every page in the interval 1-4
for page in pages:
x=ufo.loc[page,["Id"]]
x=x.values
x=str(x)[2:-2]
my_url='http://www.imdb.com/title/' + x + '/reviews?spoiler=hide&sort=helpfulnessScore&dir=desc&ratingFilter=0'
response = get(my_url)
# Pause the loop
sleep(randint(8,15))
# Monitor the requests
requests += 1
elapsed_time = time() - start_time
print('Request:{}; Frequency: {} requests/s'.format(requests, requests/elapsed_time))
clear_output(wait = True)
# Throw a warning for non-200 status codes
if response.status_code != 200:
#
warn('Request: {}; Status code: {}'.format(requests, response.status_code))
# Break the loop if the number of requests is greater than expected
if requests > 1000:
warn('Number of requests was greater than expected.')
break
page_soup = BeautifulSoup(response.text, 'html.parser')
# Parse HTML page
在100或150个请求后出现此错误。我也设置了计时器。 如果有人帮助我解决这个问题,将不胜感激。在此先感谢!
ConnectionError: HTTPSConnectionPool(host='www.imdb.com', port=443): Max retries exceeded with url: /title/tt8577370/reviews?spoiler=hide&sort=helpfulnessScore&dir=desc&ratingFilter=0 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x08F8D990>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))
更多
TimeoutError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
140 conn = connection.create_connection(
--> 141 (self.host, self.port), self.timeout, **extra_kw)
142
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
82 if err is not None:
---> 83 raise err
84
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
72 sock.bind(source_address)
---> 73 sock.connect(sa)
74 return sock
TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
345 try:
--> 346 self._validate_conn(conn)
347 except (SocketTimeout, BaseSSLError) as e:
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _validate_conn(self, conn)
849 if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
--> 850 conn.connect()
851
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
283 # Add certificate verification
--> 284 conn = self._new_conn()
285
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
149 raise NewConnectionError(
--> 150 self, "Failed to establish a new connection: %s" % e)
151
NewConnectionError: <urllib3.connection.VerifiedHTTPSConnection object at 0x08F8D990>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
439 retries=self.max_retries,
--> 440 timeout=timeout
441 )
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
638 retries = retries.increment(method, url, error=e, _pool=self,
--> 639 _stacktrace=sys.exc_info()[2])
640 retries.sleep()
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
387 if new_retry.is_exhausted():
--> 388 raise MaxRetryError(_pool, url, error or ResponseError(cause))
389
MaxRetryError: HTTPSConnectionPool(host='www.imdb.com', port=443): Max retries exceeded with url: /title/tt8577370/reviews?spoiler=hide&sort=helpfulnessScore&dir=desc&ratingFilter=0 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x08F8D990>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-177-1c2f32fb7294> in <module>()
26 print(my_url)
27 print('-------------------------')
---> 28 response = get(my_url)
29 # Pause the loop
30 sleep(randint(8,15))
C:\ProgramData\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
70
71 kwargs.setdefault('allow_redirects', True)
---> 72 return request('get', url, params=params, **kwargs)
73
74
C:\ProgramData\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
56 # cases, and look like a memory leak in others.
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
60
C:\ProgramData\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
506 }
507 send_kwargs.update(settings)
--> 508 resp = self.send(prep, **send_kwargs)
509
510 return resp
C:\ProgramData\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
638
639 # Resolve redirects if allowed.
--> 640 history = [resp for resp in gen] if allow_redirects else []
641
642 # Shuffle things around if there's history.
C:\ProgramData\Anaconda3\lib\site-packages\requests\sessions.py in <listcomp>(.0)
638
639 # Resolve redirects if allowed.
--> 640 history = [resp for resp in gen] if allow_redirects else []
641
642 # Shuffle things around if there's history.
C:\ProgramData\Anaconda3\lib\site-packages\requests\sessions.py in resolve_redirects(self, resp, req, stream, timeout, verify, cert, proxies, yield_requests, **adapter_kwargs)
216 proxies=proxies,
217 allow_redirects=False,
--> 218 **adapter_kwargs
219 )
220
C:\ProgramData\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
616
617 # Send the request
--> 618 r = adapter.send(request, **kwargs)
619
620 # Total elapsed time of the request (approximately)
C:\ProgramData\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
506 raise SSLError(e, request=request)
507
--> 508 raise ConnectionError(e, request=request)
509
510 except ClosedPoolError as e:
ConnectionError: HTTPSConnectionPool(host='www.imdb.com', port=443): Max retries exceeded with url: /title/tt8577370/reviews?spoiler=hide&sort=helpfulnessScore&dir=desc&ratingFilter=0 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x08F8D990>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))
有时会出现此错误
ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))