import scrapy
from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.http import Request
from scrapy import Item, Field
class NItems(Item):
creat = Field()
class NSpider(Spider):
def parse(self,response):
item = NItems()
for da in response.xpath('//div[@class="section-stream-content"]'):
item['link'] = da.xpath('//tr/td[2]/div[1]/h2/a/@href').extract()
for con in item['link']:
request = scrapy.Request(con, callback = self.parseDate)
request.meta['item'] = item
yield request
def parseDate(self, response):
item = response.meta['item']
hxs = Selector(response)
cr = hxs.xpath('//*[@class="byline"]/text() | //*[@id="yui_3_16_0_1_1428166780085_527"]/text() | //div[@class="metadata"]/p/strong/text() | //p[@class="update-time"]/text() | //p[@class="publisheddate"]/text() | //p[@class="date"]/text() | //div[@class="last_updated"]/p/a/text()').extract()
item['creat'].append(cr)
cr = hxs.xpath('//*[@class="byline"]/text() | //*[@id="yui_3_16_0_1_1428166780085_527"]/text() | //div[@class="metadata"]/p/strong/text() | //p[@class="update-time"]/text() | //p[@class="publisheddate"]/text() | //p[@class="date"]/text() | //div[@class="last_updated"]/p/a/text()').extract()
fot z in cr:
print z
我尝试从链接页面收集一些数据,但是当我尝试将收集的数据添加到项目列表时,我收到此错误。 为什么我在行上收到此错误--- item ['creat']。append(az)---在我的理解中,我在列表中插入一个列表
答案 0 :(得分:1)
您需要在item['creat']
方法中将parse()
初始化为空列表:
def parse(self,response):
item = NewsItems()
for da in response.xpath('//div[@class="section-stream-content"]'):
item['link'] = da.xpath('//tr/td[2]/div[1]/h2/a/@href').extract()
item['creat'] = []
...
或者,在cr
中将其设为parseDate()
:
item['creat'] = cr