我正在尝试使用scrapy从站点进行爬网,并使用站点内容中的链接。但是,当我执行此操作时,在解析中yield statemant上方的行上出现错误:
TypeError: 'NoneType' object does not support item assignment
这是我的代码:
class PostsSpider(scrapy.Spider):
name = "posts"
start_urls = ['https://www.nba.com/teams/bucks']
allowed_domains = ['nba.com']
def parse(self, response):
for post in response.css('.nba-player-index section section'):
playerPage = response.urljoin(post.css('a').attrib['href'])
item = yield scrapy.Request(playerPage, callback=self.helper)
item['number'] = post.css('span.nba-player-trending-item__number::text').get(),
yield item
def helper(self, response):
print("--->"+response.css("title").get())
item = Item()
item['title'] = response.css("title::text").get()
yield item
class Item(scrapy.Item):
# define the fields for your item here like:
number = scrapy.Field()
title = scrapy.Field()
ppg = scrapy.Field()
答案 0 :(得分:0)
您可以做的是将number
数据传递给助手,而不要这样做。
像这样:
def parse(self, response):
for post in response.css('.nba-player-index section section'):
playerPage = response.urljoin(post.css('a').attrib['href'])
meta = response.meta.copy()
meta['number'] = post.css('span.nba-player-trending-item__number::text').get()
yield scrapy.Request(playerPage, callback=self.helper, meta=meta)
def helper(self, response):
# here you will get `number` in response.meta['number'] that you can yield further.
item = Item()
item['number'] = response.meta.get('number)
yield item