我想废弃[关注http://lookup.mbon.org/verification/Search.aspx
首先我进入职业(护理)和姓氏(A *),然后获得列出的页面
我无法明智地获取列表页面,找到下一页代码的下面代码,
def on_page(self,response):
forms = self.pagedata(response)
pages = response.css("#datagrid_results").css("tr")[41].css("a::attr(href)").extract()
page_continue=pages[-1]
yield Request(url=response.url, callback=self.on_search,dont_filter=True,errback=self.handle_error)
for page in pages:
no_page=page[25:53]
forms["__EVENTTARGET"] = no_page
if page != page_continue:
if(page != "javascript:__doPostBack('datagrid_results$ctl44$ctl00','')"):
yield FormRequest(url=response.url,callback=self.on_search, formdata = forms, dont_filter=True)
elif page == "javascript:__doPostBack('datagrid_results$ctl44$ctl40','')" or "javascript:__doPostBack('datagrid_results$ctl44$ctl41','')" :
yield FormRequest(url=response.url,callback=self.on_search, formdata = forms, dont_filter=True)
def on_search(self, response):
print ("search started")
pages = response.xpath("//table[@id='datagrid_results']/tr[42]/text()").extract()
rows = response.xpath("//table[@id='datagrid_results']/tr/td[contains(@id, 'datagrid_results')]/a/@href").extract()
for row in rows:
url = response.urljoin(row)
yield Request(url = url, callback=self.on_detail_page)