这是我编写的完整代码:
import pandas as pd
import scrapy
from scrapy.crawler import CrawlerProcess
class quote_collector(scrapy.Spider):
name = "quote_collector_spider"
def start_requests( self ):
main_url = 'https://www.goodreads.com/quotes?'
urls = [ 'https://www.goodreads.com/quotes?' ]
for i in range(98):
url = main_url+'page='+str(i+2)
urls.append(url)
for url in urls:
yield scrapy.Request( url = url, callback = self.parse )
def parse( self, response ):
quote_blocks = response.xpath('//div[@class="quoteText"]')
my_quotes = []
for block in quote_blocks:
quote = block.xpath('text()').extract_first().strip()
author = block.xpath('.//span[@class="authorOrTitle"]/text()').extract_first().strip()
ref = block.xpath('.//span/a[@class="authorOrTitle"]/text()').extract_first()
my_quotes.append([quote, author, ref])
quotes_df = pd.DataFrame(columns = ['Quote','Author','Referrence'])
quotes_df = pd.concat(quotes_df, pd.DataFrame(my_quotes, columns = ['Quote','Author','Referrence']))
# initiate a CrawlerProcess
process = CrawlerProcess()
# tell the process which spider to use
process.crawl(quote_collector)
# start the crawling process
process.start()
我得到的错误在这里:
* 在开始的第309行中,文件“ C:\ ProgramData \ Anaconda3 \ lib \ site-packages \ scrapy \ crawler.py” Reactor.run(installSignalHandlers = False)#阻止调用
文件“ C:\ ProgramData \ Anaconda3 \ lib \ site-packages \ twisted \ internet \ base.py”,行1282,正在运行 self.startRunning(installSignalHandlers = installSignalHandlers)
文件“ C:\ ProgramData \ Anaconda3 \ lib \ site-packages \ twisted \ internet \ base.py”,在startRunning中的第1262行 ReactorBase.startRunning(self)
在startRunning中的文件“ C:\ ProgramData \ Anaconda3 \ lib \ site-packages \ twisted \ internet \ base.py”,行765 引发错误.ReactorNotRestartable()
ReactorNotRestartable * 请帮助我了解这是什么错误,以及我在这里做错了什么。