我试图在PyCharm上测试与网站的连接。 当我运行代码时,我会遇到此错误:
我使用BeautifulSoup模块
import urllib.request
from bs4 import BeautifulSoup
def get_html(url):
response = urllib.request.urlopen(url)
return response.read()
def parse(html):
soup = BeautifulSoup(html)
def main():
print(get_html('https://tap.az/'))
if __name__ == '__main__':
main()
C:\Users\Adil\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/Adil/Desktop/PAbot/pabot.py
Traceback (most recent call last):
File "C:/Users/Adil/Desktop/PAbot/pabot.py", line 21, in <module>
main()
File "C:/Users/Adil/Desktop/PAbot/pabot.py", line 17, in main
print(get_html('https://tap.az/'))
File "C:/Users/Adil/Desktop/PAbot/pabot.py", line 8, in get_html
response = urllib.request.urlopen(url)
File "C:\Users\Adil\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Adil\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\Adil\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\Adil\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\Adil\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\Adil\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
答案 0 :(得分:1)
尝试在请求标头中添加用户代理。
from bs4 import BeautifulSoup
import urllib.request
user_agent = 'Mozilla/5.0'
headers = {'User-Agent': user_agent}
request = urllib.request.Request(url='https://tap.az/', headers=headers)
response = urllib.request.urlopen(request)
result = BeautifulSoup(response.read(), 'html.parser')
print(result)