我尝试使用python scrapy创建一个解析器,它应该从网页中提取一些字段。我在这里遇到的问题是我无法以正确的方式将参数传递给spider类中的“target_page”方法。我知道我从“解析”方法向“target_page”传递“caption”和“value”字段的方式根本不是pythonic。但是,我这样做是因为我真的不知道如何以适当的方式在“target_page”方法中传递上述字段。我该怎么做?提前谢谢。
import scrapy
class BrokerSpider(scrapy.Spider):
name = "brokersp"
start_urls = ["https://www.mql5.com/en/signals/mt4/page{0}".format(page_num) for page_num in range(1,10)]
def parse(self, response):
for links in response.css("div#signals-table div.signal"):
caption = links.css('span.name::text').extract_first()
value = links.css('div.col-price::text').extract_first()
link = links.css('a.signal-avatar::attr(href)').extract_first()
yield scrapy.Request(caption, value, url = link, callback = self.target_page)
def target_page(self, title, price, response):
jobber = response.css('div.header span a::text').extract_first()
profit = response.css('div.cell.total a.blue::text').extract_first()
yield {'title':title,'price':price,'jobber':jobber,'profit':profit}
答案 0 :(得分:3)
您可以使用请求的meta
属性(documentation):
def parse(self, response):
for links in response.css("div#signals-table div.signal"):
caption = links.css('span.name::text').extract_first()
value = links.css('div.col-price::text').extract_first()
link = links.css('a.signal-avatar::attr(href)').extract_first()
request = scrapy.Request(url = link, callback = self.target_page)
request.meta['caption'] = caption
request.meta['value'] = value
yield request
def target_page(self, response):
caption = response.meta['caption']
value = response.meta['value']
jobber = response.css('div.header span a::text').extract_first()
profit = response.css('div.cell.total a.blue::text').extract_first()
yield {'title':caption,'price':value,'jobber':jobber,'profit':profit}