重定向first post call url后,我收到了第一个帖子调用网址 点击第一个选择按钮second post call url后,我无法获得第二个帖子网址 请帮助这个迷你项目
我的代码:
import scrapy
import time
class Govreq(scrapy.Spider):
name = 'Gov-req'
start_urls = ['http://www.assessment.cot.tn.gov/re_assessment/SelectCounty.aspx?map=true&SelectCounty=003']
download_delay = 1.5
def parse(self, response):
yield scrapy.FormRequest(
'http://www.assessment.cot.tn.gov/re_assessment/SelectCounty.aspx?map=true&SelectCounty=003',
formdata={
'_EVENTTARGET':'',
'__EVENTARGUMENT':'',
'__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
'__VIEWSTATEGENERATOR':'C7482FC3',
'__EVENTVALIDATION':response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
'ctl00%24MainContent%24countylist': '003',
'ctl00%24MainContent%24txtOwnerName': 'aa',
'ctl00%24MainContent%24txtPropertyAddress': '',
'ctl00%24MainContent%24txtControlMap':'',
'ctl00%24MainContent%24txtGroup':'',
'ctl00%24MainContent%24txtParcel':'',
'ctl00%24MainContent%24txtSubdivisionName':'',
'ctl00%24MainContent%24ddlClass':'99',
'ctl00%24MainContent%24txtBegSaleDate':'',
'ctl00%24MainContent%24txtEndingSaleDate':'',
'ctl00%24MainContent%24Sort':'Owner',
'ctl00%24MainContent%24btnSearch':'SEARCH'
},callback=self.parse_tags
)
def parse_tags(self, response):
print 'parcel'
yield scrapy.FormRequest.from_response(response, callback=self.pracel_list_next)
def pracel_list_next(self, response):
print 'prarcel_list_next'
time.sleep(5)
yield scrapy.FormRequest.from_response(response,
formdata={
'_EVENTTARGET': 'ctl00%24MainContent%24GridView1',
'__EVENTARGUMENT': 'select%240',
'__VIEWSTATE': response.css('input#__VIEWSTATE::attr(value)').extract_first(),
'__VIEWSTATEGENERATOR': 'F71013A5',
'__VIEWSTATEENCRYPTED':'',
'__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first()
}, callback=self.parse_results)
def parse_results(self, response):
filename = response.url.split("/")[-2] + '.html'
print filename
with open(filename, 'wb') as f:
f.write(response.body)
yield {
'quote': response.xpath('//title//text()').extract()
}