Question

from bs4 import BeautifulSoup
import requests
url = 'https://hmbup.in/online/frmViewCandidateDetails.aspx'

html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')

VIEWSTATEGENERATOR = soup.find(id='__VIEWSTATEGENERATOR')['value']
EVENTVALIDATION = soup.find(id='__EVENTVALIDATION')['value']
data ={
     '__VIEWSTATEGENERATOR': VIEWSTATEGENERATOR,
     '__EVENTVALIDATION': EVENTVALIDATION,
     'txtRegNo': 'H010002',
     'btnSearch': 'Search',
          }
r1 = requests.post(url,data=data)
soup1 = BeautifulSoup(r1.text,'html.parser')
name = soup1.find('span',id_='lblEngName')
print name.text

试图使用有效载荷发布请求来抓取该网站，但我没有得到结果。

Answer 1

您忘记了__VIEWSTATE

from bs4 import BeautifulSoup
import requests

url = 'https://hmbup.in/online/frmViewCandidateDetails.aspx'

with requests.Session() as s:
    html = s.get(url).text
    soup = BeautifulSoup(html, 'html.parser')
    VIEWSTATE = soup.find(id='__VIEWSTATE')['value']
    VIEWSTATEGENERATOR = soup.find(id='__VIEWSTATEGENERATOR')['value']
    EVENTVALIDATION = soup.find(id='__EVENTVALIDATION')['value']
    data ={
         '__VIEWSTATEGENERATOR': VIEWSTATEGENERATOR,
         '__VIEWSTATE' : VIEWSTATE,
         '__EVENTVALIDATION': EVENTVALIDATION,
         'txtRegNo': 'H010002',
         'btnSearch': 'Search',
              }
    r1 = s.post(url,data=data)
    soup = BeautifulSoup(r1.content, 'lxml')
    table = soup.select_one('.j_table')

Answer 2

实现相同目的的方式略有不同。

import requests
from bs4 import BeautifulSoup

link = 'https://hmbup.in/online/frmViewCandidateDetails.aspx'

res = requests.get(link)
soup = BeautifulSoup(res.text, 'lxml')
payload = {item['name']:item.get('value','') for item in soup.select('input[name]')}
payload['txtRegNo'] = 'H010002'
resp = requests.post(link,data=payload)
soup_obj = BeautifulSoup(resp.text, 'lxml')

for trs in soup_obj.find(class_='j_table').find_all('tr'):
    data = [td.get_text(strip=True) for td in trs.find_all('td')]
    print(data)

尝试使用有效载荷发布请求获取数据

2 个答案: