我正在尝试创建从另一个蜘蛛继承功能的较小蜘蛛。
这里是主蜘蛛的代码:
import scrapy
import pandas as pd
import os.path
class main(scrapy.Spider):
allowed_domains = ['domain.com']
def parse_product_list(self, response):
urls = response.xpath("//*[contains(@class, 'col-xs-12 col-sm-6 col-lg-3')]/a/@href").getall()
for url in urls:
url = "https:" + url
yield scrapy.Request(url, self.parse_product_view, dont_filter=True)
# Next Page
next_page = response.css(".pull-left .pagination a:contains('Next')::attr(href)").get()
if next_page is not None:
# yield response.follow_all(url, callback=self.parse)
# yield from response.follow_all(pagination_links, self.parse)
yield scrapy.Request(next_page, callback=self.parse, dont_filter=True)
def parse_product_view(self, response):
productSku = response.css("#products_model::text").get().replace("Model #","")
productName = response.css("#products_name::text").get().replace(";",'') + "- " + productSku
# productName = '"' + productNameUgly + '"'
productDescription = response.css("#productDescription").get().replace(";",'')
# MSRP Price
if "$" in response.css("#products_price::text").get():
productPrice = response.css("#products_price::text").get().replace("$", "")
else:
productPrice = response.css("#products_price s::text").get().replace("$", "")
# Special Price
if response.css("#products_price .productSpecialPrice::text").get():
specialPrice = response.css("#products_price .productSpecialPrice::text").get().replace("Sale:$","")
else:
specialPrice = ""
# Product Image
image = response.css("img.img-responsive.img-fill.product-details-big-thumb::attr(src)").get()
gallery = response.xpath("//*[@class='product-details-thumb-float']/a/@href").getall()
gallery.insert(0, image)
gallery = '; '.join(gallery).replace("wm.php/","")
# Extra Attribute
if response.xpath(".//*[@class='product-notes-feature']/li").getall():
key = response.xpath(".//*[@class='product-notes-feature']/li/@data-product-feature").getall()
value = response.xpath(".//*[@class='product-notes-feature']/li/text()").getall()
attributes = dict(zip(key, value))
productAttributes = attributes
for key, value in attributes.items():
productAttributes[key] = value.replace(key + ": ", "")
export_essential = {
"sku": productSku,
"label": productSku,
"name": productName,
"description": productDescription,
"price": productPrice,
"special_price": specialPrice,
"image": image,
"media_gallery": gallery
}
export_all = export_essential
if productAttributes:
export_all.update(productAttributes)
yield export_all
现在我复制并粘贴基础蜘蛛的整个文件,然后更改名称和网址。这不是一种有效的方式。如果基础蜘蛛有变化,我必须回到所有的小蜘蛛那里去更新。
谢谢
答案 0 :(得分:0)
我不确定您要问什么,但是如果您将第一行更改为:
class Spider(scrapy.Spider):
def __init__(self, url):
self.allowed_domains = [url]
然后你可以调用这个文件spider.py,在另一个文件中,说:
from spider import Spider
spider1 = Spider('domain.com')
这就是你所追求的吗?