在第一页上它很好地在标题标签中抓取文本“test1”但在第二页“test2.html”中没有任何内容 我的剧本:
from scrapy.spider import Spider
from scrapy.selector import Selector
from testscrapy1.items import Website
class DmozSpider(Spider):
name = "bill"
allowed_domains = ["http://www.mywebsite.com"]
start_urls = [
"http://www.mywebsite.com/test.html"]
def parse(self,response):
for site in response.xpath('//head'):
item = Website()
item['title'] = site.xpath('//title/text()').extract()
yield item
yield scrapy.Request(url="www.mywebsite.com/test1.html", callback=self.other_function)
def other_function(self,response):
for other_thing in response.xpath('//head'):
item = Website()
item['title'] = other_thing.xpath('//title/text()').extract()
yield item
提前感谢STEF
答案 0 :(得分:1)
尝试
yield scrapy.Request(url =“www.mywebsite.com”,callback = self.other_function)
而不是
yield scrapy.Request(url =“www.mywebsite.com/test1.html”,callback = self.other_function)