for hotel in response.xpath('//div[contains(@class,"sr_item")]'):
hotelName = hotel.xpath('.//span[contains(@class,"sr-hotel__name")]//text()')
print hotelName.extract()
hotel_image = hotel.xpath('.//img[contains(@class, "hotel_image")]//@src')
print hotel_image.extract()
hotelLink = hotel.xpath('.//a[contains(@class,"hotel_name_link")]//@href')
yield scrapy.Request(response.urljoin(hotelLink[0].extract()), self.parseHotel)
next_page = response.xpath('//a[contains(@class,"paging-next")]//@href')
我的代码可以看作附加为图像。你可以看到,在for循环中。我希望Scrapy从函数" hotelParse"返回,然后继续执行for循环。
然而,现在,它首先打印所有的酒店名称,意思是,for循环完全执行,然后" hotelParse"开始屈服。
这会弄乱我的输出,一次,我开始为项目对象分配值。
答案 0 :(得分:0)
您要尝试做的几乎绝对是the Scrapy documentation中的“将附加数据传递给回调函数”。以下是您的案例:
def parse_item(self, response):
for hotel in response.xpath('//div[contains(@class,"sr_item")]'):
item = HotelItem()
hotelName = hotel.xpath('.//span[contains(@class,"sr-hotel__name")]//text()')
print hotelName.extract()
item["hotelName"] = hotelName
hotel_image = hotel.xpath('.//img[contains(@class, "hotel_image")]//@src')
print hotel_image.extract()
item["hotel_image"] = hotel_image
hotelLink = hotel.xpath('.//a[contains(@class,"hotel_name_link")]//@href')
request = scrapy.Request(response.urljoin(hotelLink[0].extract()), self.parseHotel)
request.meta['item'] = item
yield request
next_page = response.xpath('//a[contains(@class,"paging-next")]//@href')
yield scrapy.Request(response.urljoin(next_page.extract()), self.parse_item)
def parseHotel(self, response):
item = response.meta['item']
item["extra_1"] = response.xpath('/example/text()').extract_first()
item["extra_2"] = response.xpath('/example2/text()').extract_first()
yield item