我正在尝试跳转到下一页而不会报废,并在下一页开始报废。
这是我的代码。
import scrapy
import re
class ProductSpider(scrapy.Spider):
name = 'product'
start_url = ['https://www.google.nl/search?client=opera&biw=1880&bih=1008&output=search&tbm=shop&q=738678181690&oq=738678181690&gs_l=products-cc.12...0.0.0.2438282.0.0.0.0.0.0.0.0..0.0....0...1ac..64.products-cc..0.0.0....0.65SJMExjNxE#spd=0']
start_url = response.css('.MCpGKc > a::attr(href)').extract_first()
if start_url:
start_url = response.urljoin(start_url)
yield scrapy.Request(url=next_page_url, callback=self.parse)
这里我开始报废,问题就在上面。
def parse(self, response):
self.log('Bla Bla Bla:' + response.url)
for product in response.css(".os-main-table"):
item ={
"all_sellers": product.css(".os-seller-name-primary > a::text").extract(),
"all_prices": product.css("td.os-total-col::text").re("\d+\,\d{1,2}"),
}
for item in zip(all_prices,all_sellers):
scrapped_info = {
'all_sellers' : item[0],
'all_prices': item[1],
}
yield scrapped_info
next_page_url = response.css('.pag-prev-next-links > a:last-child::attr(href)').extract_first()
if next_page_url:
next_page_url = response.urljoin(next_page_url)
yield scrapy.Request(url=next_page_url, callback=self.parse)