from bs4 import BeautifulSoup
import requests
url = 'https://hmbup.in/online/frmViewCandidateDetails.aspx'
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
VIEWSTATEGENERATOR = soup.find(id='__VIEWSTATEGENERATOR')['value']
EVENTVALIDATION = soup.find(id='__EVENTVALIDATION')['value']
data ={
'__VIEWSTATEGENERATOR': VIEWSTATEGENERATOR,
'__EVENTVALIDATION': EVENTVALIDATION,
'txtRegNo': 'H010002',
'btnSearch': 'Search',
}
r1 = requests.post(url,data=data)
soup1 = BeautifulSoup(r1.text,'html.parser')
name = soup1.find('span',id_='lblEngName')
print name.text
试图使用有效载荷发布请求来抓取该网站,但我没有得到结果。
答案 0 :(得分:1)
您忘记了__VIEWSTATE
from bs4 import BeautifulSoup
import requests
url = 'https://hmbup.in/online/frmViewCandidateDetails.aspx'
with requests.Session() as s:
html = s.get(url).text
soup = BeautifulSoup(html, 'html.parser')
VIEWSTATE = soup.find(id='__VIEWSTATE')['value']
VIEWSTATEGENERATOR = soup.find(id='__VIEWSTATEGENERATOR')['value']
EVENTVALIDATION = soup.find(id='__EVENTVALIDATION')['value']
data ={
'__VIEWSTATEGENERATOR': VIEWSTATEGENERATOR,
'__VIEWSTATE' : VIEWSTATE,
'__EVENTVALIDATION': EVENTVALIDATION,
'txtRegNo': 'H010002',
'btnSearch': 'Search',
}
r1 = s.post(url,data=data)
soup = BeautifulSoup(r1.content, 'lxml')
table = soup.select_one('.j_table')
答案 1 :(得分:1)
实现相同目的的方式略有不同。
import requests
from bs4 import BeautifulSoup
link = 'https://hmbup.in/online/frmViewCandidateDetails.aspx'
res = requests.get(link)
soup = BeautifulSoup(res.text, 'lxml')
payload = {item['name']:item.get('value','') for item in soup.select('input[name]')}
payload['txtRegNo'] = 'H010002'
resp = requests.post(link,data=payload)
soup_obj = BeautifulSoup(resp.text, 'lxml')
for trs in soup_obj.find(class_='j_table').find_all('tr'):
data = [td.get_text(strip=True) for td in trs.find_all('td')]
print(data)