class TimCustomStatsExtension(object):
def __init__(self, stats,crawler):
self.stats = stats
self.crawler = crawler
self.time = 5
self.server = Redis()
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
instance = cls(crawler.stats,crawler)
crawler.signals.connect(instance.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(instance.spider_closed, signal=signals.spider_closed)
return instance
def spider_opened(self, spider):
self.spider = spider
self.tsk = task.LoopingCall(self.collect)
self.tsk.start(self.time, now=True)
def spider_closed(self, spider):
if self.tsk.running:
self.tsk.stop()
def collect(self):
CRAWLDAY = self.server.get("CRAWLDAY")
if time.strftime("%H:%M", time.localtime()) == CRAWLDAY:
logger.info("action crawl")
self.server.delete(self.spider.name + ":dupefilter")
reqs = self.spider.start_requests()#how to Call again scrapy start_request in here