我正在尝试一次从命令行运行多个抓爬虫。例如:命令
>> python run_scrapy_spider_1.py
>> python run_scrapy_spider_2.py
但是,一旦我已经运行了5个蜘蛛(都在不同的域上),我尝试运行的下一个蜘蛛就会挂起,并显示以下消息:
INFO:Telnet控制台正在侦听127.0.0.1:6028
我不认为我要运行的第6个站点有问题(当运行的蜘蛛少于5个时,它可以正常工作),而是5个蜘蛛已经在运行的事实。
这是我的日志输出,如果有帮助的话,有什么想法可以让我一次运行5个以上的蜘蛛吗?
--- Logging error ---
Traceback (most recent call last):
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 33, in emit
payload = self.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 833, in format
return fmt.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 573, in format
s = self.formatMessage(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 542, in formatMessage
return self._style.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 386, in format
return self._fmt % record.__dict__
KeyError: 'request_id'
Call stack:
File "run_spider_alexa_id.py", line 16, in <module>
SpiderTasks.run_spider_for_alexa_site_id(alexa_site_id)
File "/home/ec2-user/code/green_brick_two/pricecomparison_project/pricecomparison/scripts/spider_scripts/spider_tasks.py", line 18, in run_spider_for_alexa_site_id
process.crawl(MySpider, alexa_site_id=alexa_site_id)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 172, in crawl
return self._crawl(crawler, *args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 176, in _crawl
d = crawler.crawl(*args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 80, in crawl
self.engine = self._create_engine()
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 105, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/core/engine.py", line 70, in __init__
self.scraper = Scraper(crawler)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/core/scraper.py", line 69, in __init__
self.spidermw = SpiderMiddlewareManager.from_crawler(crawler)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/middleware.py", line 53, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/middleware.py", line 48, in from_settings
extra={'crawler': crawler})
File "/usr/local/lib/python3.6/logging/__init__.py", line 1301, in info
self._log(INFO, msg, args, **kwargs)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1437, in _log
self.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1447, in handle
self.callHandlers(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/sentry_sdk/integrations/logging.py", line 47, in sentry_patched_callhandlers
return old_callhandlers(self, record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1509, in callHandlers
hdlr.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 858, in handle
self.emit(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 38, in emit
self.handleError(record)
Message: 'Enabled %(componentname)ss:\n%(enabledlist)s'
Arguments: {'componentname': 'spider middleware', 'enabledlist': "['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',\n 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',\n 'scrapy.spidermiddlewares.referer.RefererMiddleware',\n 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',\n 'scrapy.spidermiddlewares.depth.DepthMiddleware']"}
2019-03-03 22:34:30 [scrapy.middleware] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
'scrapy.spidermiddlewares.referer.RefererMiddleware',
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
'scrapy.spidermiddlewares.depth.DepthMiddleware']
--- Logging error ---
Traceback (most recent call last):
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 33, in emit
payload = self.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 833, in format
return fmt.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 573, in format
s = self.formatMessage(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 542, in formatMessage
return self._style.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 386, in format
return self._fmt % record.__dict__
KeyError: 'request_id'
Call stack:
File "run_spider_alexa_id.py", line 16, in <module>
SpiderTasks.run_spider_for_alexa_site_id(alexa_site_id)
File "/home/ec2-user/code/green_brick_two/pricecomparison_project/pricecomparison/scripts/spider_scripts/spider_tasks.py", line 18, in run_spider_for_alexa_site_id
process.crawl(MySpider, alexa_site_id=alexa_site_id)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 172, in crawl
return self._crawl(crawler, *args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 176, in _crawl
d = crawler.crawl(*args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 80, in crawl
self.engine = self._create_engine()
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 105, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/core/engine.py", line 70, in __init__
self.scraper = Scraper(crawler)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/core/scraper.py", line 71, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/middleware.py", line 53, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/middleware.py", line 48, in from_settings
extra={'crawler': crawler})
File "/usr/local/lib/python3.6/logging/__init__.py", line 1301, in info
self._log(INFO, msg, args, **kwargs)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1437, in _log
self.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1447, in handle
self.callHandlers(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/sentry_sdk/integrations/logging.py", line 47, in sentry_patched_callhandlers
return old_callhandlers(self, record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1509, in callHandlers
hdlr.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 858, in handle
self.emit(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 38, in emit
self.handleError(record)
Message: 'Enabled %(componentname)ss:\n%(enabledlist)s'
Arguments: {'componentname': 'item pipeline', 'enabledlist': '[]'}
2019-03-03 22:34:30 [scrapy.middleware] INFO: Enabled item pipelines:
[]
--- Logging error ---
Traceback (most recent call last):
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 33, in emit
payload = self.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 833, in format
return fmt.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 573, in format
s = self.formatMessage(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 542, in formatMessage
return self._style.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 386, in format
return self._fmt % record.__dict__
KeyError: 'request_id'
Call stack:
File "run_spider_alexa_id.py", line 16, in <module>
SpiderTasks.run_spider_for_alexa_site_id(alexa_site_id)
File "/home/ec2-user/code/green_brick_two/pricecomparison_project/pricecomparison/scripts/spider_scripts/spider_tasks.py", line 18, in run_spider_for_alexa_site_id
process.crawl(MySpider, alexa_site_id=alexa_site_id)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 172, in crawl
return self._crawl(crawler, *args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 176, in _crawl
d = crawler.crawl(*args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 82, in crawl
yield self.engine.open_spider(self.spider, start_requests)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/core/engine.py", line 256, in open_spider
logger.info("Spider opened", extra={'spider': spider})
File "/usr/local/lib/python3.6/logging/__init__.py", line 1301, in info
self._log(INFO, msg, args, **kwargs)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1437, in _log
self.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1447, in handle
self.callHandlers(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/sentry_sdk/integrations/logging.py", line 47, in sentry_patched_callhandlers
return old_callhandlers(self, record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1509, in callHandlers
hdlr.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 858, in handle
self.emit(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 38, in emit
self.handleError(record)
Message: 'Spider opened'
Arguments: ()
2019-03-03 22:34:30 [scrapy.core.engine] INFO: Spider opened
--- Logging error ---
Traceback (most recent call last):
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 33, in emit
payload = self.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 833, in format
return fmt.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 573, in format
s = self.formatMessage(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 542, in formatMessage
return self._style.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 386, in format
return self._fmt % record.__dict__
KeyError: 'request_id'
Call stack:
File "run_spider_alexa_id.py", line 16, in <module>
SpiderTasks.run_spider_for_alexa_site_id(alexa_site_id)
File "/home/ec2-user/code/green_brick_two/pricecomparison_project/pricecomparison/scripts/spider_scripts/spider_tasks.py", line 18, in run_spider_for_alexa_site_id
process.crawl(MySpider, alexa_site_id=alexa_site_id)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 172, in crawl
return self._crawl(crawler, *args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 176, in _crawl
d = crawler.crawl(*args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 82, in crawl
yield self.engine.open_spider(self.spider, start_requests)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/core/engine.py", line 266, in open_spider
yield self.signals.send_catch_log_deferred(signals.spider_opened, spider=spider)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/signalmanager.py", line 61, in send_catch_log_deferred
return _signal.send_catch_log_deferred(signal, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/utils/signal.py", line 65, in send_catch_log_deferred
*arguments, **named)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 150, in maybeDeferred
result = f(*args, **kw)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/pydispatch/robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/extensions/logstats.py", line 35, in spider_opened
self.task.start(self.interval)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/task.py", line 194, in start
self()
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/task.py", line 239, in __call__
d = defer.maybeDeferred(self.f, *self.a, **self.kw)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 150, in maybeDeferred
result = f(*args, **kw)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/extensions/logstats.py", line 48, in log
logger.info(msg, log_args, extra={'spider': spider})
File "/usr/local/lib/python3.6/logging/__init__.py", line 1301, in info
self._log(INFO, msg, args, **kwargs)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1437, in _log
self.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1447, in handle
self.callHandlers(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/sentry_sdk/integrations/logging.py", line 47, in sentry_patched_callhandlers
return old_callhandlers(self, record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1509, in callHandlers
hdlr.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 858, in handle
self.emit(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 38, in emit
self.handleError(record)
Message: 'Crawled %(pages)d pages (at %(pagerate)d pages/min), scraped %(items)d items (at %(itemrate)d items/min)'
Arguments: {'pages': 0, 'pagerate': 0.0, 'items': 0, 'itemrate': 0.0}
2019-03-03 22:34:30 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
--- Logging error ---
Traceback (most recent call last):
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 33, in emit
payload = self.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 833, in format
return fmt.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 573, in format
s = self.formatMessage(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 542, in formatMessage
return self._style.format(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 386, in format
return self._fmt % record.__dict__
KeyError: 'request_id'
Call stack:
File "run_spider_alexa_id.py", line 16, in <module>
SpiderTasks.run_spider_for_alexa_site_id(alexa_site_id)
File "/home/ec2-user/code/green_brick_two/pricecomparison_project/pricecomparison/scripts/spider_scripts/spider_tasks.py", line 18, in run_spider_for_alexa_site_id
process.crawl(MySpider, alexa_site_id=alexa_site_id)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 172, in crawl
return self._crawl(crawler, *args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 176, in _crawl
d = crawler.crawl(*args, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/crawler.py", line 83, in crawl
yield defer.maybeDeferred(self.engine.start)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 150, in maybeDeferred
result = f(*args, **kw)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1532, in unwindGenerator
return _inlineCallbacks(None, gen, Deferred())
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 1386, in _inlineCallbacks
result = g.send(result)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/core/engine.py", line 78, in start
yield self.signals.send_catch_log_deferred(signal=signals.engine_started)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/signalmanager.py", line 61, in send_catch_log_deferred
return _signal.send_catch_log_deferred(signal, **kwargs)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/utils/signal.py", line 65, in send_catch_log_deferred
*arguments, **named)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/Twisted-18.4.0-py3.6-linux-x86_64.egg/twisted/internet/defer.py", line 150, in maybeDeferred
result = f(*args, **kw)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/pydispatch/robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/scrapy/extensions/telnet.py", line 74, in start_listening
extra={'crawler': self.crawler})
File "/usr/local/lib/python3.6/logging/__init__.py", line 1301, in info
self._log(INFO, msg, args, **kwargs)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1437, in _log
self.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1447, in handle
self.callHandlers(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/sentry_sdk/integrations/logging.py", line 47, in sentry_patched_callhandlers
return old_callhandlers(self, record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 1509, in callHandlers
hdlr.handle(record)
File "/usr/local/lib/python3.6/logging/__init__.py", line 858, in handle
self.emit(record)
File "/home/ec2-user/MYVENV/lib/python3.6/site-packages/loggly/handlers.py", line 38, in emit
self.handleError(record)
Message: 'Telnet console listening on %(host)s:%(port)d'
Arguments: {'host': '127.0.0.1', 'port': 6028}
2019-03-03 22:34:30 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6028