python3 502 Bad Gateway

时间:2018-06-07 12:35:22

标签: python web-crawler

最近我通过使用请求模块学习了python3的爬虫。我想要做的是登录GitHub。我收到502错误网关的错误。我发布带有标题和cookie的消息,但我无法解决为什么会出现502错误。非常感谢,如果有人可以帮助我的话。这是代码:

from urllib import request,parse
from bs4 import BeautifulSoup
from http import cookiejar
import gzip
preURL = 'https://github.com/login?return_to=%2Fexplore'
url = 'https://github.com/session'

print('-'*30)

#create CookieJar object
ckobj = cookiejar.CookieJar()
ckHandler = request.HTTPCookieProcessor(ckobj)
httphd = request.HTTPHandler(debuglevel = 1)
httpshd = request.HTTPSHandler(debuglevel = 1)
opener = request.build_opener(httphd, httpshd, ckHandler)

#get token
def Get_Token():
    req = opener.open(preURL)
    print('-'*30)
    print('req.headers')
    print(req.headers)
    content = req.read().decode('utf-8')
    bs = BeautifulSoup(content,'html5lib')
    # utf8Flag = bs.find(attrs={'name':'utf8'}).attrs['value']
    # token = bs.find(attrs={'name':'authenticity_token'}).attrs['value']
    # return [utf8Flag,token]
    utf8Flag = (bs.find(attrs={'name':'utf8'}).get('value'))
    token = (bs.find(attrs={'name':'authenticity_token'}).get('value'))
    # print(utf8Flag,token)
    print('-'*30)
    print(utf8Flag)
    print(token)
    print('-'*30)
    return [utf8Flag,token]


param = Get_Token()

#define the headers
myHeader = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'github.com',
'Content-Length': '196',
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'https://github.com',
'Referer': 'https://github.com/session',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
#define the data form
postData = {
'utf8': param[0],
'authenticity_token': param[1],
'login': 'xxxxxx',
'password': 'xxxxxx'}
pData = parse.urlencode(postData).encode('utf-8')


#create request.Request object containing the headers and data form

hds = request.Request(url, headers = myHeader, data = pData)

try:
    req = opener.open(hds)
except Exception as e:
    print(repr(e))

0 个答案:

没有答案