我收到TypeError:'请求'尝试访问从辅助Web请求传回的数据时,对象不可订阅:
import scrapy
class MyItem(scrapy.Item):
main_url = scrapy.Field()
addr_name = scrapy.Field()
addr = scrapy.Field()
addr_city = scrapy.Field()
class ServiceCanadaSpider(scrapy.Spider):
name = 'servicecan'
start_urls = ['http://www.servicecanada.gc.ca/tbsc-fsco/sc-lst.jsp?prov=AB&lang=eng']
def parse(self, response):
with open('test', 'w') as f:
for title in response.xpath('//li/ul/li/a'):
f.write(title.xpath('text()').extract_first())
#get url for info page
url='http://www.servicecanada.gc.ca' + title.xpath('@href').extract_first()
#parse info page
item = MyItem()
request = scrapy.Request(url, callback=self.parse_info_page)
request.meta['item'] = item
f.write(',' + url)
yield request
f.write(',' + request['addr_name'])
#f.write(',' + request.addr)
#f.write(',' + request.addr_city)
f.write('\n')
def parse_info_page(self, response):
item = response.meta['item']
item['main_url'] = response.url
if len(response.xpath('//td[@id="offInfo"]/text()')) == 3:
item['addr_name']='';
item['addr'] = response.xpath('//td[@id="offInfo"]/text()').extract()[0].replace('\n','')
item['addr_city'] = response.xpath('//td[@id="offInfo"]/text()').extract()[1].replace('\n','')
else:
item['addr_name']=response.xpath('//td[@id="offInfo"]/text()').extract()[0].replace('\n','')
item['addr'] = response.xpath('//td[@id="offInfo"]/text()').extract()[1].replace('\n','')
item['addr_city'] = response.xpath('//td[@id="offInfo"]/text()').extract()[2].replace('\n','')
return [item]
当我提出请求时,我可以看到它的MyItem类中的数据......
{'addr': ' 802 Bow Valley Trail',
'addr_city': ' Canmore, Alberta',
'addr_name': ' Canmore Gateway Shops - Building C, Suite 113',
'main_url': 'http://www.servicecanada.gc.ca/tbsc-fsco/sc-dsp.jsp?rc=4865&lang=eng'}
答案 0 :(得分:0)
Request
类确实不支持订阅,即使用[]
运算符。
如果您想通过其Request
属性访问附加到meta
实例的对象的字段,则必须明确地执行此操作:
request = scrapy.Request(url, callback=self.parse_info_page)
request.meta['item'] = item
f.write(',' + request.meta['item'].addr_name)