到目前为止,这是我的scrapy代码:
import scrapy
class Tripadvisor(scrapy.Spider):
name = 'Trip_advisor'
url = ['https://www.tripadvisor.com/Hotel_Review-g188644-d604560-Reviews-Sofitel_Brussels_Europe-Brussels.html']
def parse(self,response):
SET_SELECTOR = '.ratings_chart'
for data in response.css(SET_SELECTOR):
Excellent_Selector = '//*[@id="taplc_location_detail_overview_hotel_map_pins_default_0"]/div/div[2]/div[2]/div[1]/div[2]/ul/li[1]/span[3]'
VeryGood_Selector = '//*[@id="taplc_location_detail_overview_hotel_map_pins_default_0"]/div/div[2]/div[2]/div[1]/div[2]/ul/li[2]/span[3]'
Average_Selector = '//*[@id="taplc_location_detail_overview_hotel_map_pins_default_0"]/div/div[2]/div[2]/div[1]/div[2]/ul/li[3]/span[3]'
Poor_Selector = '//*[@id="taplc_location_detail_overview_hotel_map_pins_default_0"]/div/div[2]/div[2]/div[1]/div[2]/ul/li[4]/span[3]'
Terrible_Selector = '//*[@id="taplc_location_detail_overview_hotel_map_pins_default_0"]/div/div[2]/div[2]/div[1]/div[2]/ul/li[5]/span[3]'
yield {
'Excellent': data.xpath(Excellent_Selector).extract_first(),
'Very Good': data.xpath(VeryGood_Selector).extract_first(),
"Average": data.xpath(Average_Selector).extract_first(),
"Poor":data.xpath(Poor_Selector).extract_first(),
"Terrible":data.xpath(Terrible_Selector).extract_first()
}
每当我在项目目录中运行scrapy crawl Trip_advisor
时,我都会收到KeyError: Spider not found: Trip_advisor
,如何修复此错误?
我很困惑,因为我明确在Trip_advisor
类中定义了Tripadvisor
名称。
编辑:命令行中的回溯错误,感谢@furas
C:\Users\dh228\AppData\Local\Programs\Python\Python36-32\tutorial>scrapy crawl Trip_advisor
2017-12-26 01:41:30 [scrapy] INFO: Scrapy 1.2.0 started (bot: tutorial)
2017-12-26 01:41:30 [scrapy] INFO: Overridden settings: {'BOT_NAME': ` 'tutorial', 'NEWSPIDER_MODULE': 'tutorial.spiders', 'ROBOTSTXT_OBEY': True, 'SPIDER_MODULES': ['tutorial.spiders']}
Traceback (most recent call last):
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\spiderloader.py", line 41, in load
return self._spiders[spider_name]
KeyError: 'Trip_advisor'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\runpy.py",
line 193, in _run_module_as_main
"__main__", mod_spec)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\runpy.py",
line 85, in _run_code
exec(code, run_globals)
File "C:\Users\dh228\AppData\Local\Programs\Python\Python36-
32\Scripts\scrapy.exe\__main__.py", line 9, in <module>
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\cmdline.py", line 142, in execute
_run_print_help(parser, _run_command, cmd, args, opts)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\cmdline.py", line 88, in _run_print_help
func(*a, **kw)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\cmdline.py", line 149, in _run_command
cmd.run(args, opts)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\commands\crawl.py", line 57, in run
self.crawler_process.crawl(spname, **opts.spargs)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\crawler.py", line 162, in crawl
crawler = self.create_crawler(crawler_or_spidercls)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\crawler.py", line 190, in create_crawler
return self._create_crawler(crawler_or_spidercls)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\crawler.py", line 194, in _create_crawler
spidercls = self.spider_loader.load(spidercls)
File "c:\users\dh228\appdata\local\programs\python\python36-32\lib\site-
packages\scrapy\spiderloader.py", line 43, in load
raise KeyError("Spider not found: {}".format(spider_name))
KeyError: 'Spider not found: Trip_advisor'`