我正在尝试用请求废弃站点ASP.NET。第一步是登录,没关系,登录后我需要保留会话以从页面获取数据。
我通过request.cookies.RequestsCookieJar()获取cookie,在其他站点中此代码有效,但是在ASP.NET中开发的此站点中无效。我研究并找到了有关__VIEWSTATE和其他参数的信息,但我不知道。
#!/usr/bin/env python3
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings()
from time import sleep
class getData():
def __init__(self, url):
self.url = url
self.session = requests.Session()
def getSelectorRecaptcha(self):
page = requests.get(self.url, verify=False)
soup = BeautifulSoup(page.text, 'html.parser')
recaptcha = soup.find(class_='g-recaptcha')
value = recaptcha.attrs
return(value['data-sitekey'])
def solveRecaptcha(self, API_KEY):
site_key = self.getSelectorRecaptcha()
captcha_id = self.session.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}".format(API_KEY, site_key, self.url)).text.split('|')[1]
recaptcha_answer = self.session.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id)).text
while 'CAPCHA_NOT_READY' in recaptcha_answer:
sleep(5)
recaptcha_answer = self.session.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id)).text
if(recaptcha_answer == 'ERROR_WRONG_CAPTCHA_ID'):
badreport = self.session.post("https://2captcha.com/res.php?key={}&action=reportbad&id={}".format(API_KEY, captcha_id)).text
if (recaptcha_answer != 'ERROR_WRONG_CAPTCHA_ID'):
recaptcha_answer = recaptcha_answer.split('|')[1]
payload = {
'key': site_key,
'g-recaptcha-response': recaptcha_answer
}
response = self.session.post(self.url, payload, verify=False)
def doLogin(self):
login_data = {'login':'xxxxxxxx', 'password':'xxxxxxx'}
self.solveRecaptcha('xxxxxxxxxxxxxxxxxxxxxxxxxxxx')
response = self.session.post(self.url, login_data, verify=False) #login
soup = BeautifulSoup(response.content, 'html.parser')
print(soup.select_one('__VIEWSTATE'))
print(soup.select_one('__VIEWSTATEGENERATOR'))
print(soup.select_one('__EVENTARGUMENT'))
print(soup.select_one('__EVENTVALIDATION'))
#get cookies
jar = requests.cookies.RequestsCookieJar()
c = self.session.cookies.get_dict()
for i in self.session.cookies.get_dict():
jar.set(i, c[i])
self.session.cookies = jar
page = self.session.get('private url', verify=False)
soup = BeautifulSoup(page.text, 'html.parser')
print(page.status_code)
print(soup.find(id='some id'))
def main():
obj = getData('private url')
obj.doLogin()
if __name__ == "__main__":
main()
https://www.dell.com/中也会发生此问题,我无法传递URL,但是正在Dell站点中进行测试。我希望在登录后获取数据。拜托,有人可以帮我吗?