我从网站上的一个页面开始,并试图找到一个链接,将我带到另一个页面废弃。但是,由于我无法解决我的xpath到第一页上的链接带我到另一页的原因不起作用。我试图从几个方向到达链接,但似乎没有一个工作?
import scrapy
from urllib.parse import urljoin
from autotrader1.items import Autotrader1Item
class Autotrader1Spider(scrapy.Spider):
name = "autotrader1"
allowed_domains = ["autotrader.co.uk"]
start_urls = ["https://www.autotrader.co.uk/dealers/greater-manchester/manchester/liberty-cars-10000889"]
def parse(self,response):
for href1 in response.xpath('//*[@class="dealer__stock-header"]/a/@href'):
url1 = urljoin('https://autotrader.co.uk/',href1.extract())
yield scrapy.Request(url1, callback=self.parse_dir_contents1)
def parse_dir_contents1(self, response):
for sel in response.xpath('//div[@class="dealerStock"]'):
URLs = sel.xpath('.//a[@class="external title"]/@href ').extract()
URLs = [URL.strip() for URL in URLs]
descriptions = sel.xpath('.//a[@class="external title"]/text()').extract()
descriptions = [description.strip() for description in descriptions]
prices = sel.xpath('.//div/span[@class="deal-price"]/text()').extract()
prices = [price.strip() for price in prices]
result = zip(URLs, descriptions, prices)
for URL, description, price in result:
item = Autotrader1Item()
item['URL'] = URL
item['description'] = description
item['price'] = price
yield item