return self._values [key] exception.KeyError:Python

时间:2015-04-04 18:56:03

标签: python python-2.7 scrapy scrapy-spider

    import scrapy
    from scrapy.spider import Spider
    from scrapy.selector import Selector
    from scrapy.http import Request
    from scrapy import Item, Field


    class NItems(Item):

      creat = Field()


    class NSpider(Spider):

      def parse(self,response):

        item = NItems()
        for da in response.xpath('//div[@class="section-stream-content"]'):

          item['link'] = da.xpath('//tr/td[2]/div[1]/h2/a/@href').extract()

          for con in item['link']:

            request = scrapy.Request(con, callback = self.parseDate)
            request.meta['item'] = item
            yield request

      def parseDate(self, response):

        item = response.meta['item']

        hxs = Selector(response)

        cr = hxs.xpath('//*[@class="byline"]/text() | //*[@id="yui_3_16_0_1_1428166780085_527"]/text() | //div[@class="metadata"]/p/strong/text() | //p[@class="update-time"]/text() | //p[@class="publisheddate"]/text() | //p[@class="date"]/text() | //div[@class="last_updated"]/p/a/text()').extract()

        item['creat'].append(cr)   

        cr = hxs.xpath('//*[@class="byline"]/text() | //*[@id="yui_3_16_0_1_1428166780085_527"]/text() | //div[@class="metadata"]/p/strong/text() | //p[@class="update-time"]/text() | //p[@class="publisheddate"]/text() | //p[@class="date"]/text() | //div[@class="last_updated"]/p/a/text()').extract()

        fot z in cr:

          print z

我尝试从链接页面收集一些数据,但是当我尝试将收集的数据添加到项目列表时,我收到此错误。 为什么我在行上收到此错误--- item ['creat']。append(az)---在我的理解中,我在列表中插入一个列表

enter image description here

1 个答案:

答案 0 :(得分:1)

您需要在item['creat']方法中将parse()初始化为空列表:

def parse(self,response):
    item = NewsItems()
    for da in response.xpath('//div[@class="section-stream-content"]'):
        item['link'] = da.xpath('//tr/td[2]/div[1]/h2/a/@href').extract()
        item['creat'] = []
        ... 

或者,在cr中将其设为parseDate()

item['creat'] = cr