最近我通过使用请求模块学习了python3的爬虫。我想要做的是登录GitHub。我收到502错误网关的错误。我发布带有标题和cookie的消息,但我无法解决为什么会出现502错误。非常感谢,如果有人可以帮助我的话。这是代码:
from urllib import request,parse
from bs4 import BeautifulSoup
from http import cookiejar
import gzip
preURL = 'https://github.com/login?return_to=%2Fexplore'
url = 'https://github.com/session'
print('-'*30)
#create CookieJar object
ckobj = cookiejar.CookieJar()
ckHandler = request.HTTPCookieProcessor(ckobj)
httphd = request.HTTPHandler(debuglevel = 1)
httpshd = request.HTTPSHandler(debuglevel = 1)
opener = request.build_opener(httphd, httpshd, ckHandler)
#get token
def Get_Token():
req = opener.open(preURL)
print('-'*30)
print('req.headers')
print(req.headers)
content = req.read().decode('utf-8')
bs = BeautifulSoup(content,'html5lib')
# utf8Flag = bs.find(attrs={'name':'utf8'}).attrs['value']
# token = bs.find(attrs={'name':'authenticity_token'}).attrs['value']
# return [utf8Flag,token]
utf8Flag = (bs.find(attrs={'name':'utf8'}).get('value'))
token = (bs.find(attrs={'name':'authenticity_token'}).get('value'))
# print(utf8Flag,token)
print('-'*30)
print(utf8Flag)
print(token)
print('-'*30)
return [utf8Flag,token]
param = Get_Token()
#define the headers
myHeader = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'github.com',
'Content-Length': '196',
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'https://github.com',
'Referer': 'https://github.com/session',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
#define the data form
postData = {
'utf8': param[0],
'authenticity_token': param[1],
'login': 'xxxxxx',
'password': 'xxxxxx'}
pData = parse.urlencode(postData).encode('utf-8')
#create request.Request object containing the headers and data form
hds = request.Request(url, headers = myHeader, data = pData)
try:
req = opener.open(hds)
except Exception as e:
print(repr(e))