我正在尝试抓取此网站,首先要求我填写表单,然后转到所需的页面: http://fcainfoweb.nic.in/PMSver2/Reports/Report_Menu_web.aspx
我写过以下代码,但不知道什么是错的。请帮忙:
import scrapy
class SpidyQuotesViewStateSpider(scrapy.Spider):
name = 'spidyquotes-viewstate'
start_urls = ['http://fcainfoweb.nic.in/PMSver2/Reports/Report_Menu_web.aspx']
download_delay = 1.5
def parse(self, response):
yield scrapy.FormRequest.from_response(
response,
formdata={
'ctl00_MainContent_ToolkitScriptManager1_HiddenField':response.css('input#ctl00_MainContent_ToolkitScriptManager1_HiddenField::attr(value)').extract_first(),
'__EVENTTARGET':'ctl00$MainContent$Rbl_Rpt_type$0',
#'__EVENTARGUMENT':'',
#'__LASTFOCUS':'',
#'__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
#'__VIEWSTATEGENERATOR':response.css('input#__VIEWSTATEGENERATOR::attr(value)').extract_first(),
#'__VIEWSTATEENCRYPTED':response.css('input#__VIEWSTATEENCRYPTED::attr(value)').extract_first(),
#'__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
'ctl00$MainContent$Ddl_Rpt_type':'Retail',
'ctl00$MainContent$ddl_Language':'English',
'ctl00$MainContent$Rbl_Rpt_type':'Price+report',
},
callback=self.parse_tags,
)
def parse_tags(self, response):
yield scrapy.FormRequest.from_response(
response,
formdata={
'ctl00_MainContent_ToolkitScriptManager1_HiddenField':response.css('input#ctl00_MainContent_ToolkitScriptManager1_HiddenField::attr(value)').extract_first(),
'__EVENTTARGET':'ctl00$MainContent$Ddl_Rpt_Option0',
#'__EVENTARGUMENT':'',
#'__LASTFOCUS':'',
#'__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
#'__VIEWSTATEGENERATOR':response.css('input#__VIEWSTATEGENERATOR::attr(value)').extract_first(),
#'__VIEWSTATEENCRYPTED':response.css('input#__VIEWSTATEENCRYPTED::attr(value)').extract_first(),
#'__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
'ctl00$MainContent$Ddl_Rpt_type':'Retail',
'ctl00$MainContent$ddl_Language':'English',
'ctl00$MainContent$Rbl_Rpt_type':'Price+report',
'ctl00$MainContent$Ddl_Rpt_Option0':'Daily+Prices',
},
callback=self.parse_date,
)
def parse_date(self, response):
yield scrapy.FormRequest(
'http://fcainfoweb.nic.in/PMSver2/Reports/Report_Menu_web.aspx',
formdata={
#'ctl00_MainContent_ToolkitScriptManager1_HiddenField':response.css('input#ctl00_MainContent_ToolkitScriptManager1_HiddenField::attr(value)').extract_first(),
'__EVENTTARGET':'',
#'__EVENTARGUMENT':'',
#'__LASTFOCUS':'',
#'__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
#'__VIEWSTATEGENERATOR':response.css('input#__VIEWSTATEGENERATOR::attr(value)').extract_first(),
#'__VIEWSTATEENCRYPTED':response.css('input#__VIEWSTATEENCRYPTED::attr(value)').extract_first(),
#'__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
'ctl00$MainContent$Ddl_Rpt_type':'Retail',
'ctl00$MainContent$ddl_Language':'English',
'ctl00$MainContent$Rbl_Rpt_type':'Price+report',
'ctl00$MainContent$Ddl_Rpt_Option0':'Daily+Prices',
'ctl00$MainContent$Txt_FrmDate':'01/02/2017',
'ctl00$MainContent$btn_getdata1':'Get+Data',
},
callback=self.parse_results,
)
def parse_results(self, response):
response.css('div.Panel1')
答案 0 :(得分:0)
答案 1 :(得分:0)
您可能还需要指定标题。 请与Google检查员核实当前版本是什么,或者您可以快速获取以下内容。
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.8,ru;q=0.6',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36',
'X-Compress': '0',
}
答案 2 :(得分:0)