我是python新手,试图通过Beautiful Soup获取要解析的网站数据。但是,每当我运行get请求时,都会出现此错误。
我下载了html文件并用它来编写处理过程,但是无法进行在线更新。我要按照以下YouTube指南进行操作:https://www.youtube.com/watch?v=ng2o98k983k
import requests
from bs4 import BeautifulSoup
source=requests.get('https://rd2l.gg/seasons/bLikBZrRH/divisions/HJSzO_xoM/teams').text
soup=BeautifulSoup(source,'lxml')
print(r.text)
runfile('C:/Users/Akono/Documents/RD2L Season 18/RD2L DB/scrape and csv.py', wdir='C:/Users/Akono/Documents/RD2L Season 18/RD2L DB')
Traceback (most recent call last):
File "<ipython-input-225-654df9a749a6>", line 1, in <module>
runfile('C:/Users/Akono/Documents/RD2L Season 18/RD2L DB/scrape and csv.py', wdir='C:/Users/Akono/Documents/RD2L Season 18/RD2L DB')
File "C:\Users\Akono\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\Akono\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Akono/Documents/RD2L Season 18/RD2L DB/scrape and csv.py", line 27, in <module>
source=requests.get('https://rd2l.gg/seasons/bLikBZrRH/divisions/HJSzO_xoM/teams').text
File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connectionpool.py", line 600, in urlopen
chunked=chunked)
File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connectionpool.py", line 343, in _make_request
self._validate_conn(conn)
File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connectionpool.py", line 839, in _validate_conn
conn.connect()
File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connection.py", line 355, in connect
cert = self.sock.getpeercert()
File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\contrib\pyopenssl.py", line 363, in getpeercert
'subjectAltName': get_subj_alt_name(x509)
File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\contrib\pyopenssl.py", line 213, in get_subj_alt_name
ext = cert.extensions.get_extension_for_class(
File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\utils.py", line 170, in inner
result = func(instance)
File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\hazmat\backends\openssl\x509.py", line 127, in extensions
self._backend, self._x509
File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\hazmat\backends\openssl\decode_asn1.py", line 252, in parse
value = handler(backend, ext_data)
File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\hazmat\backends\openssl\decode_asn1.py", line 400, in _decode_authority_information_access
return x509.AuthorityInformationAccess(access_descriptions)
File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\x509\extensions.py", line 286, in __init__
def __repr__(self):
TypeError: Every item in the descriptions list must be an AccessDescription
答案 0 :(得分:0)
我想你想要这样的东西吗?
res1= requests.get(url)
f_html = utils.format_html(res1.text)
def format_html(self, html_str):
"""
format html page source, BeautifulSoup makes sure formatted output source is valid for parsing
:param html_str: html page source string
:return: formatted html
"""
soup = bs4.BeautifulSoup(html_str, 'html5lib')
f_html = soup.prettify()
logging.debug(f'Formatted html::: {f_html}')
return f_html