我是Scrapy的新手,我正在尝试抓取page并从商品中获取价格,问题是scrapy正在返回无序的值,我不知道为什么。
这是我的简单代码
import scrapy
from ..items import AmazonItem
from scrapy.http import Request
import time
class QuotesSpider(scrapy.Spider):
name = "main"
def start_requests(self):
urls = [
'https://www.amazon.com/best-sellers-movies-TV-DVD-Blu-ray/zgbs/movies-tv/ref=zg_bs_nav_0',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
# amazon = AmazonItem()
ol_response = response.xpath('//ol[@id="zg-ordered-list"]/li')
for number_ra in range(0,50):
response_div = ol_response[number_ra]
price = response_div.css(".p13n-sc-price::text").extract()
item_name = response_div.xpath("span/div/span/a/div/text()").get().strip()
link = response_div.xpath("span/div/span/a").attrib['href'].split('/')[3].split('?')[0]
print("({}) {} , PRICE: {}".format(number_ra+1,item_name,price))
print(link+"\n")
名称和ID的顺序正确,但价格不正确。
谢谢,伙计们
答案 0 :(得分:1)
您做错了方式
您应该一个一个地遍历每个项目
def解析(自身,响应):
for item in response.xpath('//ol[@id="zg-ordered-list"]/li'):
price = item.css(".p13n-sc-price::text").get()
item_name = item.css(".p13n-sc-truncate.p13n-sc-line-clamp-1::text").get()
link = response.urljoin(item.css(".a-link-normal::attr(href)").get())
print("{} , PRICE: {}".format(item_name,price))