Question

我是python新手，试图通过Beautiful Soup获取要解析的网站数据。但是，每当我运行get请求时，都会出现此错误。

我下载了html文件并用它来编写处理过程，但是无法进行在线更新。我要按照以下YouTube指南进行操作：https://www.youtube.com/watch?v=ng2o98k983k

import requests
from bs4 import BeautifulSoup
source=requests.get('https://rd2l.gg/seasons/bLikBZrRH/divisions/HJSzO_xoM/teams').text
soup=BeautifulSoup(source,'lxml')
print(r.text)

runfile('C:/Users/Akono/Documents/RD2L Season 18/RD2L DB/scrape and csv.py', wdir='C:/Users/Akono/Documents/RD2L Season 18/RD2L DB')
Traceback (most recent call last):

  File "<ipython-input-225-654df9a749a6>", line 1, in <module>
    runfile('C:/Users/Akono/Documents/RD2L Season 18/RD2L DB/scrape and csv.py', wdir='C:/Users/Akono/Documents/RD2L Season 18/RD2L DB')

  File "C:\Users\Akono\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/Akono/Documents/RD2L Season 18/RD2L DB/scrape and csv.py", line 27, in <module>
    source=requests.get('https://rd2l.gg/seasons/bLikBZrRH/divisions/HJSzO_xoM/teams').text

  File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\api.py", line 75, in get
    return request('get', url, params=params, **kwargs)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\api.py", line 60, in request
    return session.request(method=method, url=url, **kwargs)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\sessions.py", line 533, in request
    resp = self.send(prep, **send_kwargs)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\sessions.py", line 646, in send
    r = adapter.send(request, **kwargs)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\requests\adapters.py", line 449, in send
    timeout=timeout

  File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connectionpool.py", line 600, in urlopen
    chunked=chunked)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connectionpool.py", line 343, in _make_request
    self._validate_conn(conn)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connectionpool.py", line 839, in _validate_conn
    conn.connect()

  File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\connection.py", line 355, in connect
    cert = self.sock.getpeercert()

  File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\contrib\pyopenssl.py", line 363, in getpeercert
    'subjectAltName': get_subj_alt_name(x509)

  File "C:\Users\Akono\Anaconda3\lib\site-packages\urllib3\contrib\pyopenssl.py", line 213, in get_subj_alt_name
    ext = cert.extensions.get_extension_for_class(

  File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\utils.py", line 170, in inner
    result = func(instance)

  File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\hazmat\backends\openssl\x509.py", line 127, in extensions
    self._backend, self._x509

  File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\hazmat\backends\openssl\decode_asn1.py", line 252, in parse
    value = handler(backend, ext_data)

  File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\hazmat\backends\openssl\decode_asn1.py", line 400, in _decode_authority_information_access
    return x509.AuthorityInformationAccess(access_descriptions)

  File "C:\Users\Akono\AppData\Roaming\Python\Python37\site-packages\cryptography\x509\extensions.py", line 286, in __init__
    def __repr__(self):

TypeError: Every item in the descriptions list must be an AccessDescription

Answer 1

我想你想要这样的东西吗？

res1= requests.get(url)    
f_html = utils.format_html(res1.text)

def format_html(self, html_str):
        """
        format html page source, BeautifulSoup makes sure formatted output source is valid for parsing
        :param html_str: html page source string
        :return: formatted html
        """
        soup = bs4.BeautifulSoup(html_str, 'html5lib')
        f_html = soup.prettify()
        logging.debug(f'Formatted html::: {f_html}')
        return f_html

如何修复“ TypeError：描述列表中的每个项目都必须是一个AccessDescription”

1 个答案: