使用Request和BeautifulSoup登录网站后无法解析HTML页面

时间:2019-05-28 21:15:52

标签: python web-scraping beautifulsoup request

我被困住了。我以为我登录了该页面,但实际上没有。如果您通常登录,它会显示页面持续旋转的结果。以下是我到目前为止的工作。请从我的代码中给我一些有关此问题的指导。非常感谢。

import requests
from bs4 import BeautifulSoup as bs
import pandas as pd 
import re



### login website
# 1st try
# login_url ='https://loancare.wd.lendingsvcs.com/smagent/forms/DynamicRetry.lfcc?SMQUERYDATA=-SM-TcPEnLCQOTVw0RRFsYUmRoki5pfUI7kHGaee53mt0HbsMxnytnRsQF9Lqhb4IXampw5G7tkEiMHnvKJYp0FivW6NldwFhBPklptKPJNO8VlrTnkC5RyhhrRxvkNOC18XA1l%2fO6Fo3EEvacFUSpFMmRIuG0OubNGpaP2fFF%2bC7QHN2S17xoslsnT2pB1gO23QVAEQsGbWL%2bm9F1xUJSsTH060vFBKFRbxBrA5TBCYpb%2fJPeOcnMQXpK0Uvouepvgru6%2fWUcQ7ZoUUMb1j5yGJBUhQohM8vazgYqnhyr8cDsABp3APvX5BbwFdbg8fFEuUfLimPeTJeIgJtxLYp%2fVdOTRjImfsbxnmkicCdd02MaRfsM0JSebUnTHbB1%2fJo%2bDO7P%2fHQgWVq5%2bQQ9zMFFrFzclxJYk54tRjl3DabEFZUln4yaSj60QUqJuLttmhLsPE'

# 2nd try
# login_url ='https://loancare.wd.lendingsvcs.com/smagent/forms/DynamicRetry.lfcc?SMQUERYDATA=-SM-YqDWGVYPqWY8B5OeWH3aAAimesXFQJ%2fJZl7sRyuKfOaqsAKdA%2fdfrgNl5ApC3q62OYvR98r%2fBHIytJa3QlVruC7F0wmsqUXFk4lh9whGJQFJPsLSSZE1suQW3mFFgRjDcwCPdEunMEwu%2fvEglLGa1W5z5Oq8ww53oXmtxyW2hfl3mI%2foNTbyya4DpptRTu5rLC1QthHeCjvmU8Ss5PumMI96cTCefMdmFwowyYDGKMkK76h9FIe8BJFwoBECSWPsnUOK0YEAMM4XFEh6yyaJcurlgzrnWhUOjVMw9gcBwaEqw3Y0gFyT0JZ6LpjkrG%2fNfauWm8%2fdU7R6W20WHCwUUAObQgRTxBsxooG2u37kzQ%2b7vzUcgmNwoi551t27rG1zhJbFOQAh42ZfujyTiAeuveIH5uyH0C4YK%2fn5YSh5LGQ6GaCpnZR0cj7T%2b965f8f3'

# 3rd try
# login_url = 'https://loancare.wd.lendingsvcs.com/webdirect/WebDirect.html'

### find hidden input tags
r = requests.get(login_url, headers=headers)
soup = bs(r.text)
itags = soup.find_all(name="input")
for tag in itags:
   print(tag)
print('\n')

### account info
username = 'myusername'
password = 'mypassword'

### fill in input tags
payload = {    
    "USERINPUT":username,
    "PASSWORD":password,
    "SMENC":"ISO-8859-1",
    "SMLOCALE":"US-EN",
    "UserCompany":"loancare",
    "USER": "carrie.mcgahan,o=loancare",
    "smauthreason":"0",
    "smagentname":"sKydreaDKgXaN3m7DBUPd3z8oTpsSTRHjK/Eb2ArY4ip5nyfodVHKy8fvnIR3ghL",
    "postpreservationdata":""
         }

### fill in headers
headers = {
    "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
}



### start session.
session = requests.session()

### login
r = session.post(url=login_url, headers= headers, params=payload)

### try one link first
scrape_url = 'https://loancare.wd.lendingsvcs.com/webdirect/WebDirect.html#taskListView?loanNumber=0020371076&clientId=623&providerName=loancare&systemId=prod'

### request page want to scrape.
url = session.get(url=scrape_url, headers=headers)
print(url.content)

session.close()

0 个答案:

没有答案