我写了一个小python脚本,这是我的python脚本。
#! /usr/bin/python3
import notify2
import requests,bs4,pprint
import time
while True:
respond=requests.get('https://cricbuzz.com')
soup=bs4.BeautifulSoup(respond.text,'html.parser')
element = soup.find_all('div', {'class': 'cb-col cb-col-25 cb-mtch-blk'})
for match in element:
if 'IND' in match.getText():
pprint.pprint(match.getText())
break
notify2.init('app name')
n = notify2.Notification("Match Score",
match.getText(),
"notification-message-im"
)
n.show()
time.sleep(int(60*2))
使用Beautiful Soup模块接收请求并解析HTML,该模块在其中运行两分钟并继续运行。它工作正常,但低于下面的值突然发生了错误,不知道为什么吗? 我的笔记本电脑的互联网连接良好。
错误:
Traceback (most recent call last):
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 343, in _make_request
self._validate_conn(conn)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 849, in _validate_conn
conn.connect()
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connection.py", line 356, in connect
ssl_context=context)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/util/ssl_.py", line 359, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "/usr/lib/python3.6/ssl.py", line 407, in wrap_socket
_context=self, _session=session)
File "/usr/lib/python3.6/ssl.py", line 814, in __init__
self.do_handshake()
File "/usr/lib/python3.6/ssl.py", line 1068, in do_handshake
self._sslobj.do_handshake()
File "/usr/lib/python3.6/ssl.py", line 689, in do_handshake
self._sslobj.do_handshake()
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/chaitu/.local/lib/python3.6/site-packages/requests/adapters.py", line 445, in send
timeout=timeout
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/util/retry.py", line 367, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/packages/six.py", line 685, in reraise
raise value.with_traceback(tb)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 343, in _make_request
self._validate_conn(conn)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 849, in _validate_conn
conn.connect()
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/connection.py", line 356, in connect
ssl_context=context)
File "/home/chaitu/.local/lib/python3.6/site-packages/urllib3/util/ssl_.py", line 359, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "/usr/lib/python3.6/ssl.py", line 407, in wrap_socket
_context=self, _session=session)
File "/usr/lib/python3.6/ssl.py", line 814, in __init__
self.do_handshake()
File "/usr/lib/python3.6/ssl.py", line 1068, in do_handshake
self._sslobj.do_handshake()
File "/usr/lib/python3.6/ssl.py", line 689, in do_handshake
self._sslobj.do_handshake()
urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "./score.py", line 10, in <module>
respond=requests.get('https://cricbuzz.com')
File "/home/chaitu/.local/lib/python3.6/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/home/chaitu/.local/lib/python3.6/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/home/chaitu/.local/lib/python3.6/site-packages/requests/sessions.py", line 512, in request
resp = self.send(prep, **send_kwargs)
File "/home/chaitu/.local/lib/python3.6/site-packages/requests/sessions.py", line 622, in send
r = adapter.send(request, **kwargs)
File "/home/chaitu/.local/lib/python3.6/site-packages/requests/adapters.py", line 495, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
答案 0 :(得分:0)
我用这个功能来抓取 Youtube 的喜欢和不喜欢。它提供了对 241 个链接的访问,它们会再次访问并从我离开的地方抓取更多。尝试/排除和延长睡眠时间,您将能够比现在刮得更多。此外,如果您想解决这个问题,您可以使用某些代理或 VPN 来不断更改您的 IP,这样请求就不会来自同一个 IP 地址。
def scrape_info(url):
result = {}
session = HTMLSession()
try:
response = session.get(url)
# execute Java-script
response.html.render(sleep=40)
# create bs object to parse HTML
soup = bs(response.html.html, "html.parser")
# number of likes
x = soup.find_all("yt-formatted-string", {"id": "text", "class": "ytd-toggle-button-renderer"})
if x is not None and len(x) > 0:
result["likes"] = x[0].text
result["dislikes"] = x[1].text
else:
result["likes"] = None
result["dislikes"] = None
except requests.exceptions.ConnectionError:
print("Connection refused")
result["likes"] = None
result["dislikes"] = None
time.sleep(30)
return result