我写了一个程序,目的是登录我公司的一个网站,然后抓取数据,以便更快地收集数据。这是使用请求和美丽的汤。
我可以打印出页面的html代码,但我无法通过aspx登录,然后在页面上打印html。
下面是使用的代码和我的标题和参数。任何帮助将不胜感激
if #available(iOS 8.0, *)
{
if application.respondsToSelector("isRegisteredForRemoteNotifications")
{
let types:UIUserNotificationType = ([.Alert, .Sound, .Badge])
let settings:UIUserNotificationSettings = UIUserNotificationSettings(forTypes: types, categories: nil)
application.registerUserNotificationSettings(settings)
application.registerForRemoteNotifications()
}
}
else{
let types: UIRemoteNotificationType = [.Alert, .Badge, .Sound]
application.registerForRemoteNotificationTypes(types)
}
FROM DATA
import requests
from bs4 import BeautifulSoup
URL="http://mycompanywebsiteloginpage.co.uk/Login.aspx"
headers={"User-Agent":"Mozilla/5.0 (X11; Linux x86_64; rv:44.0) Gecko/20100101 Firefox/44.0 Iceweasel/44.0.2"}
username="myusername"
password="mypassword"
s=requests.Session()
s.headers.update(headers)
r=s.get(URL)
soup=BeautifulSoup(r.content)
VIEWSTATE=soup.find(id="__VIEWSTATE")['value']
EVENTVALIDATION=soup.find(id="__EVENTVALIDATION")['value']
EVENTTARGET=soup.find(id="__EVENTTARGET")['value']
EVENTARGUEMENT=soup.find(id="__EVENTARGUMENT")['value']
login_data={"__VIEWSTATE":VIEWSTATE,
"ctl00$ContentPlaceHolder1$_tbEngineerUsername":username,
"ctl00$ContentPlaceHolder1$_tbEngineerPassword":password,
"ctl00$ContentPlaceHolder1$_tbSiteOwnerEmail":"",
"ctl00$ContentPlaceHolder1$_tbSiteOwnerPassword":"",
"ctl00$ContentPlaceHolder1$tbAdminName":username,
"ctl00$ContentPlaceHolder1$tbAdminPassword":password,
"__EVENTVALIDATION":EVENTVALIDATION,
"__EVENTTARGET":EVENTTARGET,
"--EVENTARGUEMENT":EVENTARGUEMENT}
r = s.post(URL, data=login_data)
r = requests.get("http://mycompanywebsitespageafterthelogin.co.uk/Secure/")
print (r.url)
print (r.text)
请求COOKIES
__VIEWSTATE:"DAwNEAIAAA4BBQAOAQ0QAgAADgEFAw4BDRACDwEBBm9ubG9hZAFkU2hvd1BhbmVsKCdjdGwwMF9Db250ZW50UGxhY2VIb2xkZXIxX19wbkFkbWluaXN0cmF0b3JzJywgZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoJ2FkbWluTG9naW5MaW5rJykpOwAOAQUBDgENEAIAAA4DBQEFBwULDgMNEAIMDwEBDUFsdGVybmF0ZVRleHQBDldEU0kgRGFzaGJvYXJkAAAAAA0QAgAADgIFAAUBDgINEAIPAQEEVGV4dAEEV0RTSQAAAA0QAgwPAQEHVmlzaWJsZQgAAAAADRACDwECBAABBFdEU2kAAAAAAABCX8QugS7ztoUJMfDmZ0s20ZNQfQ=="
ctl00$ContentPlaceHolder1$_tbEngineerUsername:"myusername"
ctl00$ContentPlaceHolder1$_tbEngineerPassword:"mypassword"
ctl00$ContentPlaceHolder1$_tbSiteOwnerEmail:""
ctl00$ContentPlaceHolder1$_tbSiteOwnerPassword:""
ctl00$ContentPlaceHolder1$tbAdminName:"myusername"
ctl00$ContentPlaceHolder1$tbAdminPassword:"mypassword"
__EVENTVALIDATION:"HQABAAAA/////wEAAAAAAAAADwEAAAAKAAAACBzHEFXh+HCtf3vdl8crWr6QZnmaeK7pMzThEoU2hwqJxnlkQDX2XLkLAOuKEnW/qBMtNK2cdpQgNxoGtq65"
__EVENTTARGET:"ctl00$ContentPlaceHolder1$_btAdminLogin"
__EVENTARGUMENT:""
响应标题
ASP.NET_SessionId:"11513CDDE31AF267CCD87BAB"
请求标题
Cache-Control:"private"
Connection:"Keep-Alive"
Content-Length:"123"
Content-Type:"text/html; charset=utf-8"
Date:"Thu, 28 Jul 2016 13:37:45 GMT"
Keep-Alive:"timeout=15, max=91"
Location:"/Secure/"
Server:"Apache/2.2.14 (Ubuntu)"
x-aspnet-version:"2.0.50727"
答案 0 :(得分:3)
更改行
r = requests.get("http://mycompanywebsitespageafterthelogin.co.uk/Secure/")
使用会话对象
r = s.get("http://mycompanywebsitespageafterthelogin.co.uk/Secure/")