如何在EXTENSIONS中再次调用scrapy start_request

时间:2019-03-19 09:19:14

标签: python-3.x scrapy

class TimCustomStatsExtension(object):
    def __init__(self, stats,crawler):
        self.stats = stats
        self.crawler = crawler
        self.time = 5
        self.server = Redis()

    @classmethod
    def from_crawler(cls, crawler, *args, **kwargs):
        instance = cls(crawler.stats,crawler)
        crawler.signals.connect(instance.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(instance.spider_closed, signal=signals.spider_closed)
        return instance

    def spider_opened(self, spider):
        self.spider = spider
        self.tsk = task.LoopingCall(self.collect)
        self.tsk.start(self.time, now=True)

    def spider_closed(self, spider):
        if self.tsk.running:
            self.tsk.stop()

    def collect(self):
        CRAWLDAY = self.server.get("CRAWLDAY")
        if time.strftime("%H:%M", time.localtime()) == CRAWLDAY:
            logger.info("action crawl")
            self.server.delete(self.spider.name + ":dupefilter")
            reqs = self.spider.start_requests()#how to Call again scrapy start_request in here

0 个答案:

没有答案