我在开始抓取时遇到此错误。我在几个论坛中搜索了一个答案,并查看了scrapy/middleware.py
中的代码(标准为scrapy
并且我没有改变它)并且无法弄清楚为什么我收到错误。
刮刀正在使用ImagesPipeline
和S3FilesStore
管道来存储json文件,并将图像直接下载到不同的S3文件夹中。我使用的是Python 3.6。
感谢任何帮助。错误信息和我的刮刀设置如下,如果有任何其他用处,请告诉我。
Traceback (most recent call last):
File "/Users/user/anaconda/envs/python3/lib/python3.6/site-
packages/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/Users/user/anaconda/envs/python3/lib/python3.6/site-
packages/scrapy/crawler.py", line 77, in crawl
self.engine = self._create_engine()
File "/Users/user/anaconda/envs/python3/lib/python3.6/site-
packages/scrapy/crawler.py", line 102, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "/Users/user/anaconda/envs/python3/lib/python3.6/site-
packages/scrapy/core/engine.py", line 70, in __init__
self.scraper = Scraper(crawler)
File "/Users/user/anaconda/envs/python3/lib/python3.6/site-
packages/scrapy/core/scraper.py", line 71, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "/Users/user/anaconda/envs/python3/lib/python3.6/site-
packages/scrapy/middleware.py", line 58, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/Users/user/anaconda/envs/python3/lib/python3.6/site-
packages/scrapy/middleware.py", line 40, in from_settings
mw = mwcls()
TypeError: __init__() missing 1 required positional argument: 'uri'
ITEM_PIPELINES = {
'scrapy.pipelines.files.S3FilesStore': 1,
'scrapy.pipelines.images.ImagesPipeline': 1
}
AWS_ACCESS_KEY_ID = 'xxxxxx'
AWS_SECRET_ACCESS_KEY= 'xxxxxx'
IMAGES_STORE = 's3 path'
FEED_URI = 's3 path'
FEED_FORMAT = 'jsonlines'
FEED_EXPORT_FIELDS = None
FEED_STORE_EMPTY = False
FEED_STORAGES = {}
FEED_STORAGES_BASE = {
'': 'scrapy.extensions.feedexport.FileFeedStorage',
'file': 'scrapy.extensions.feedexport.FileFeedStorage',
'stdout': 'scrapy.extensions.feedexport.StdoutFeedStorage',
's3': 'scrapy.extensions.feedexport.S3FeedStorage',
'ftp': 'scrapy.extensions.feedexport.FTPFeedStorage',
}
FEED_EXPORTERS = {}
FEED_EXPORTERS_BASE = {
'json': 'scrapy.exporters.JsonItemExporter',
'jsonlines': 'scrapy.exporters.JsonLinesItemExporter',
'jl': None,
'csv': None,
'xml': None,
'marshal': None,
'pickle': None,
}