我不得不尝试这段代码
import scrapy
from scrapy.cmdline import execute
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from metacritic.items import MetacriticItem
class MetacriticSpider(scrapy.spider):
name = "metacritic" # Name of the spider, to be used when crawling
allowed_domains = ["metacritic.com"] # Where the spider is allowed to go
start_urls = [
"http://www.metacritic.com/browse/games/title/pc?page=0"
]
def parse(self, response):
hxs = HtmlXPathSelector(response) # The XPath selector
sites = hxs.select('//li[contains(@class, "product game_product")]/div[@class="product_wrap"]')
items = []
for site in sites:
item = MetacriticItem()
item['title'] = site.select('div[@class="basic_stat product_title"]/a/text()').extract()
item['link'] = site.select('div[@class="basic_stat product_title"]/a/@href').extract()
item['cscore'] = site.select('div[@class="basic_stat product_score brief_metascore"]/div/div/span[contains(@class, "data metascore score")]/text()').extract()
item['uscore'] = site.select('div[@class="more_stats condensed_stats"]/ul/li/span[contains(@class, "data textscore textscore")]/text()').extract()
item['date'] = site.select('div[@class="more_stats condensed_stats"]/ul/li/span[@class="data"]/text()').extract()
items.append(item)
return items
我已经尝试了某种方法来修复此代码,但我总是遇到错误
/ home / kautsar / metacritic 2 / metacritic / spiders / metacritic_spider.py:3:ScrapyDeprecationWarning:模块scrapy.spider
已弃用,请改用scrapy.spiders
来自scrapy.spider导入BaseSpider
Traceback(最近一次调用最后一次):
文件“/ home / kautsar / metacritic 2 / metacritic / spiders / metacritic_spider.py”,第6行,in
MetacriticSpider类(scrapy.spider):
TypeError:调用元类库时出错
module。 init ()最多需要2个参数(给定3个)
有谁知道如何解决这个问题?
答案 0 :(得分:0)
from scrapy.spider import BaseSpider
...
class MetacriticSpider(scrapy.spider):
如您所见,scrapy.spider是一个模块名称。你的类试图从它继承。通常,您的类应该继承自另一个类 - 在这种情况下可能是BaseSpider。