以下是解析页面的内容:
def parse_search_result(self, response):
# inspect_response(response)
delete = ""
i = 1
while (i < 0x20):
delete += chr(i)
i+=1
try:
# import pdb; pdb.set_trace()
next_page = response.xpath("//div[@class='align_center']//node()[following::span and not(@class='noborder')]/@href").extract()[0]
next_page = str(next_page).translate(None, delete)
next_page_url = '{0}{1}'.format(self.base_url[0], next_page)
yield FormRequest(next_page_url, method="GET", callback = self.parse_search_result)
except:
pass
yield FormRequest(response.url, method="GET", callback = self.parse_applications)
我有一个从解析函数到自身的递归调用。逻辑是从响应中获取下一个url并向同一函数形成一个请求,以便重复遍历页面(分页)。 我没有得到正确的预期,scrapy能够处理这个吗?