我制作了一个程序,废弃了简单的维基百科页面。我用python'来推荐这本书' Web Scraping。
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://en.wikipedia.org/wiki/Kevin_Bacon")
bsObj = BeautifulSoup(html, 'html.parser')
for link in bsObj.findAll("a"):
if 'href' in link.attrs:
print(link.attrs['href'])
但它会产生ssl错误:
Traceback (most recent call last):
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1240, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1083, in request
self._send_request(method, url, body, headers)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1128, in _send_request
self.endheaders(body)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1079, in endheaders
self._send_output(message_body)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 911, in _send_output
self.send(msg)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 854, in send
self.connect()
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1237, in connect
server_hostname=server_hostname)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/ssl.py", line 376, in wrap_socket
_context=self)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/ssl.py", line 747, in __init__
self.do_handshake()
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/ssl.py", line 983, in do_handshake
self._sslobj.do_handshake()
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/ssl.py", line 628, in do_handshake
self._sslobj.do_handshake()
ssl.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:646)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 6, in <module>
html = urlopen("http://en.wikipedia.org/wiki/Kevin_Bacon")
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 471, in open
response = meth(req, response)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 581, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 503, in error
result = self._call_chain(*args)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 443, in _call_chain
result = func(*args)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 686, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 465, in open
response = self._open(req, data)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 483, in _open
'_open', req)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 443, in _call_chain
result = func(*args)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1283, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/usr/local/Cellar/python3/3.5.0/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1242, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:646)>
Python版本是3.5.0。
书并没有处理这种错误。我是否必须使用pip安装与ssl相关的软件包?
答案 0 :(得分:0)
当服务器提供的SSL证书上列出的域与浏览器所连接的域不匹配时,会发生SSL错误。要开始HTTPS会话,证书上的域必须与浏览器地址栏中的域完全匹配。
这意味着您尝试使用
打开的网址urllib.request.urlopen(url)
没有回复正确的http响应。 现在,您可以尝试其中一项 -
使用
检查您的http / https代理处理程序urlib.request.proxyHandler()
如果您认为网址是安全的并且您不想确保其包含SSL身份验证,请尝试通过https服务器。