如何在scrapy中处理两个连续的后调用(302重定向)?

时间:2016-09-09 12:19:56

标签: python-2.7 web-scraping scrapy

重定向first post call url后,我收到了第一个帖子调用网址 点击第一个选择按钮second post call url后,我无法获得第二个帖子网址 请帮助这个迷你项目

我的代码:

import scrapy
import time


class Govreq(scrapy.Spider):
name = 'Gov-req'
start_urls = ['http://www.assessment.cot.tn.gov/re_assessment/SelectCounty.aspx?map=true&SelectCounty=003']
download_delay = 1.5

def parse(self, response):

    yield scrapy.FormRequest(
        'http://www.assessment.cot.tn.gov/re_assessment/SelectCounty.aspx?map=true&SelectCounty=003',
        formdata={

            '_EVENTTARGET':'',
            '__EVENTARGUMENT':'',
            '__VIEWSTATE':response.css('input#__VIEWSTATE::attr(value)').extract_first(),
            '__VIEWSTATEGENERATOR':'C7482FC3',
            '__EVENTVALIDATION':response.css('input#__EVENTVALIDATION::attr(value)').extract_first(),
            'ctl00%24MainContent%24countylist': '003',
            'ctl00%24MainContent%24txtOwnerName': 'aa',
            'ctl00%24MainContent%24txtPropertyAddress': '',
            'ctl00%24MainContent%24txtControlMap':'',
            'ctl00%24MainContent%24txtGroup':'',
            'ctl00%24MainContent%24txtParcel':'',
            'ctl00%24MainContent%24txtSubdivisionName':'',
            'ctl00%24MainContent%24ddlClass':'99',
            'ctl00%24MainContent%24txtBegSaleDate':'',
            'ctl00%24MainContent%24txtEndingSaleDate':'',
            'ctl00%24MainContent%24Sort':'Owner',
            'ctl00%24MainContent%24btnSearch':'SEARCH'
        },callback=self.parse_tags

    )

def parse_tags(self, response):
    print 'parcel'
    yield scrapy.FormRequest.from_response(response, callback=self.pracel_list_next)

def pracel_list_next(self, response):
    print 'prarcel_list_next'
    time.sleep(5)
    yield scrapy.FormRequest.from_response(response,
     formdata={

            '_EVENTTARGET': 'ctl00%24MainContent%24GridView1',
            '__EVENTARGUMENT': 'select%240',
            '__VIEWSTATE': response.css('input#__VIEWSTATE::attr(value)').extract_first(),
            '__VIEWSTATEGENERATOR': 'F71013A5',
            '__VIEWSTATEENCRYPTED':'',
            '__EVENTVALIDATION': response.css('input#__EVENTVALIDATION::attr(value)').extract_first()

        }, callback=self.parse_results)


def parse_results(self, response):
    filename = response.url.split("/")[-2] + '.html'
    print filename
    with open(filename, 'wb') as f:
        f.write(response.body)
    yield {

        'quote': response.xpath('//title//text()').extract()

    }

0 个答案:

没有答案
相关问题