从scrapy管道中的self.files.pop(spider)获取错误{KeyError} <classname spidername =“” at =“” 0x7fb3f6b9c790 =“”>

时间:2018-10-20 14:35:34

标签: python-2.7 scrapy pipeline

当我运行scrapy从一个蜘蛛导出多个csv时,我从self.files.pop(spider)中收到错误{KeyError}。

这是我的管道。

class PhysiciansPipeline(object):
    def __init__(self, spider):
        self.files = {}
        full_path = result_path(spider.result_path_type, spider.name)
        self.exporter1 = CsvItemExporter(fields_to_export=PhysiciansItem.fields.keys(),file=open(full_path + 'file1.csv','wb'))
        self.exporter2 = CsvItemExporter(fields_to_export=SpecialtiesItem.fields.keys(),file=open(full_path + 'file2.csv','wb'))
        self.exporter3 = CsvItemExporter(fields_to_export=LocationsItem.fields.keys(), file=open(full_path + 'file3.csv', 'wb'))

    @classmethod
    def from_crawler(cls, crawler):
        spider = crawler.spider
        pipeline = cls(spider)
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.exporter1.start_exporting()
        self.exporter2.start_exporting()
        self.exporter3.start_exporting()

    def spider_closed(self, spider):
        self.exporter1.finish_exporting()
        self.exporter2.finish_exporting()
        self.exporter3.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter1.export_item(item)
        self.exporter2.export_item(item)
        self.exporter3.export_item(item)
        return item

我也将此行添加到settings.py

ITEM_PIPELINES = {
   'physicians.pipelines.PhysiciansPipeline': 300,
}

此代码中的问题是什么? 谢谢。

1 个答案:

答案 0 :(得分:0)

我看不到self.files中有任何值

谈论错误,这意味着密钥spiderself.files中不存在

我猜你在找

self.files.pop(spider.name)

编辑:

class PhysiciansPipeline(object):
    def __init__(self, spider):
        self.files=[]
        self.full_path = result_path(spider.result_path_type, spider.name)
        file1 = open(full_path + 'physicians.csv','wb')
        self.files.extend([ file1 ])
        self.exporter1 = CsvItemExporter(fields_to_export=PhysiciansItem.fields.keys(),file=file1)

        file2= open(full_path + 'specialities.csv','wb')
        self.files.extend([ files2 ])
        self.exporter2 = CsvItemExporter(fields_to_export=SpecialtiesItem.fields.keys(),file=file2)

        file3 = open(full_path + 'locations.csv', 'wb')
        self.files.extend([ file3 ])
        self.exporter3 = CsvItemExporter(fields_to_export=LocationsItem.fields.keys(), file=file3)

    @classmethod
    def from_crawler(cls, crawler):
        spider = crawler.spider
        pipeline = cls(spider)
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.exporter1.start_exporting()
        self.exporter2.start_exporting()
        self.exporter3.start_exporting()

    def spider_closed(self, spider):
        self.exporter1.finish_exporting()
        self.exporter2.finish_exporting()
        self.exporter3.finish_exporting()
        for _file in self.files:
            _file.close()
        
        clean_csv(full_path)

    def process_item(self, item, spider):
        self.exporter1.export_item(item)
        self.exporter2.export_item(item)
        self.exporter3.export_item(item)
        return item