我正在尝试从以下site的aspx表中提取excel下载链接。但是我的问题是我无法提取下一页。 here也可以找到相同的内容。
import requests as requests
import re
import certifi
from pyquery import PyQuery as pq
from bs4 import BeautifulSoup
url = "https://codal.ir/Search.aspx"
# extract data from page
def extract_data(soup):
print("1")
#col = soup.find_all('div', class_='RAFIcon')
col = soup.select('div.main_table')
print(col)
for col in col:
col_all = col.find_all('a')
#print(col_all)
for link in col_all:
#print(link.get('href'))
if "http://excel.codal.ir/" in link.get('href'):
print(link.get('href'))
session = requests.Session()
response = session.get(url, verify=False)
soup = BeautifulSoup(response.content, "html.parser")
# get view state value
view_state = soup.find_all("input", {"id": "__VIEWSTATE"})[0]["value"]
print(view_state)
# get all event target values
event_target = soup.find_all("div", {"class": "pagerWrapper"})[0]
print(event_target)
#print(event_target_list)
# extract data for the 1st page
extract_data(soup)
# extract data for each page except the first
for link in event_target_list[0:]:
print("get page {0}".format(link))
post_data = {
'__EVENTTARGET': link,
'__VIEWSTATE': view_state
}
response = session.post(url, data=post_data)
soup = BeautifulSoup(response.content, "html.parser")
extract_data(soup)