我正在尝试使用ASP.NET中的requests
模块登录Python网站。
在网站上手动登录时,我可以看到以下标题以及Cookie。
请求标题:
Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Accept-Encoding:gzip,deflate
Accept-Language:en-US,en;q=0.8
Cache-Control:max-age=0
Connection:keep-alive
Content-Length:810
Content-Type:application/x-www-form-urlencoded
Cookie:ASP.NET_SessionId=sfiziz55undlnz452gfc2d55; __utma=120481550.280814175.1411461613.1411461613.1411479534.2; __utmb=120481550.1.10.1411479534; __utmc=120481550; __utmz=120481550.1411461613.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)
Host:www11.davidsonsinc.com
Origin:http://www11.davidsonsinc.com
Referer:http://www11.davidsonsinc.com/Login/Login.aspx?ReturnUrl=%2fdefault.aspx
User-Agent:Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.103 Safari/537.36
表单数据:
__EVENTTARGET:
__EVENTARGUMENT:
__LASTFOCUS:
__VIEWSTATE:/wEPDwUKMTY3MDM5MDAxNQ9kFgJmD2QWAgIDD2QWAgIDD2QWAgIBD2QWBAIBD2QWAmYPZBYCAg0PEA8WAh4HQ2hlY2tlZGdkZGRkAgMPDxYCHgdWaXNpYmxlaGRkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBUBjdGwwMCRDb250ZW50UGxhY2VIb2xkZXJOYXZQYW5lJExlZnRTZWN0aW9uJFVzZXJMb2dpbiRSZW1lbWJlck1lsSFPYUYvIbQNBPs/54aHYcx6GyU=
__VIEWSTATEGENERATOR:1806D926
__EVENTVALIDATION:/wEWBQLy8oGOCwKanaixDwKPr7TsAQKu3uTtBgKs+sa/CQVDEisOu4Iw1m9stXWgAAz9TWQn
ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$UserName:Username
ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$Password:password
ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$RememberMe:on
ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$LoginButton:Log In
申请Cookie
ASP.NET_SessionId: nz452gfc2d55
回复Cookie
.ASPXAUTH: 1F5A05237A1AA18795ECA108CE6E70D48FE5CBB5B38D061E0770618F6C069ABA03604335B6209CF8198AD3E98AE934F14056F5C887A92BB099BF38D639A22BC12972DEEE91BCE0BF36239BD1728E228E0E9CA1E5146A6C69E906E177CC8FB27395CE2F56B4013535C62E821384231EF0AD632474D6EBCFCD859882DBE9D420B6A8816BE6
以下是我使用Python / Django登录网站的脚本。
import requests
with requests.Session() as c:
url = 'http://www.noobmovies.com/accounts/login/?next=/'
USERNAME = 'user name'
PASSWORD = 'password'
c.get(url)
csrftoken = c.cookies['csrftoken']
login_data = dict(csrfmiddlewaretoken=csrftoken, username=USERNAME, password=PASSWORD, next='/')
c.post(url, data=login_data, headers={"Referer":"http://www.noobmoviews.com/"})
page = c.get('http://www.noobmovies.com/user/profile/0/')
print page.status_code
但我不知道如何登录ASP.NET网站。如何在ASP.NET网站上发布数据?
答案 0 :(得分:16)
import requests
from bs4 import BeautifulSoup
URL="http://www11.davidsonsinc.com/Login/Login.aspx"
headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36"}
username="username"
password="password"
s=requests.Session()
s.headers.update(headers)
r=s.get(URL)
soup=BeautifulSoup(r.content)
VIEWSTATE=soup.find(id="__VIEWSTATE")['value']
VIEWSTATEGENERATOR=soup.find(id="__VIEWSTATEGENERATOR")['value']
EVENTVALIDATION=soup.find(id="__EVENTVALIDATION")['value']
login_data={"__VIEWSTATE":VIEWSTATE,
"__VIEWSTATEGENERATOR":VIEWSTATEGENERATOR,
"__EVENTVALIDATION":EVENTVALIDATION,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$UserName":username,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$Password":password,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$LoginButton":"Log In"}
r=s.post(URL, data=login_data)
print r.url
答案 1 :(得分:3)
我最初使用的是请求+ bs4,但是我遇到了与ASPX网站类似的问题我正在报废。我找到了另一个名为robobrowser的库来包装请求+ bs4。有了这个,您不再需要手动设置诸如" __ VIEWSTATE"和朋友在与ASPX网站互动时。
from robobrowser import RoboBrowser
url = ' http://www11.davidsonsinc.com'
login_url = url + '/Login/Login.aspx'
username = "username"
password = "password"
browser = RoboBrowser(history=True)
# This retrieves __VIEWSTATE and friends
browser.open(login_url)
signin = browser.get_form(id='aspnetForm')
signin["ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$UserName"].value = username
signin["ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$Password"].value = password
signin["ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$LoginButton"].value = "Log In"
browser.submit_form(signin)
print browser.url
答案 2 :(得分:0)
我认为这更干净,更通用。
import requests
from bs4 import BeautifulSoup
url="http://www11.davidsonsinc.com/Login/Login.aspx"
username="username"
password="password"
session = requests.Session()
# Dont botter with headers at first
# s.headers.update(headers)
response = session.get(url)
soup = BeautifulSoup(response.content)
login_data = {}
# get the aspnet state form data needed with bsoup
aspnetstates = ['__VIEWSTATE', '__VIEWSTATEGENERATOR', '__EVENTVALIDATION', '__EVENTTARGET',
'__EVENTARGUMENT', '__VIEWSTATEENCRYPTED' ];
for aspnetstate in aspnetstates: # search for existing aspnet states and get its values
result = soup.find('input', {'name': aspnetstate})
if not (result is None): # when existent (some may not be needed!)
login_data.update({aspnetstate : result['value']})
login_data.update(
{"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$UserName" : username,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$Password" : password,
"ctl00$ContentPlaceHolderNavPane$LeftSection$UserLogin$LoginButton" : "Log In"})
response = session.post(url, data=login_data)