我正在尝试使用Alibaba Agriculture and Growing Media Category中的scrapy来收集数据。您可以Click Here来查看页面。
我要从页面中抓取的数据是 产品名称,价格,最小订单,公司名称,图片网址 。
# -*- coding: utf-8 -*-
import scrapy
class AlibabaSpider(scrapy.Spider):
name = 'alibaba'
allowed_domains = ['alibaba.com']
start_urls = ['https://www.alibaba.com/catalog/agricultural-growing-media_cid144?spm=a2700.9161164.1.2.4a934e02VlSXiW']
def parse(self, response):
for products in response.xpath('.//div[contains(@class, "m-gallery-product-item-wrap")]/div/div'):
item = {
'product_name': products.xpath('.//h2/a/@title').extract_first(),
'price': products.xpath('(.//div[@class="price"]/b/text())').extract_first().strip(),
'min_order': products.xpath('.//div[@class="min-order"]/b/text()').extract_first(),
'company_name': products.xpath('.//div[@class="stitle util-ellipsis"]/a/@title').extract_first(),
'prod_detail_link': products.xpath('.//div[@class="item-img-inner"]/a/@href').extract_first()
#'response_rate': products.xpath('.//i[@class="ui2-icon ui2-icon-skip"]/text()').extract_first(),
#'image_url': products.xpath('.//div[@class=""]/').extract_first(),
}
yield item