我一直在寻找解决方案,但我发现的那些都不适合我。在花了两天时间调试之后,我应该向你们寻求帮助。
网址看起来很好。即使我在请求代码之前硬编码了一个url,回调函数仍然无效。
我的代码是:
def parse_link(self, response):
print 'lllll', response.url
print 'bbbbb', len(response.body), response.body
def parse(self, response):
hxs = HtmlXPathSelector(response)
issues = hxs.select('//a//@id').extract()
for i in range(len(issues)):
issue = issues[i]
links_2d = hxs.select('//html//body//table[%d+%d]/tr/td//a[contains(text(),"full quotes")]/@href' % (9, i)).extract()
links_2d = list(set(links_2d))
if len(bb) < 1: continue
if len(links_2d) < 1: continue
full_link = links_2d[0]
yield scrapy.Request(url=full_link, callback = self.parse_link)
答案 0 :(得分:0)
试试这个:
def parse(self, response):
hxs = HtmlXPathSelector(response)
issues = hxs.select('//a//@id').extract()
for i in range(len(issues)):
issue = issues[i]
links_2d = hxs.select('//html//body//table[%d+%d]/tr/td//a[contains(text(),"full quotes")]/@href' % (9, i)).extract()
links_2d = list(set(links_2d))
if len(bb) < 1: continue
if len(links_2d) < 1: continue
full_link = links_2d[0]
yield scrapy.Request(str(full_link), self.parse_link)
def parse_link(self, response):
print 'lllll', response.url
print 'bbbbb', len(response.body), response.body