我想通过连接到另一个URL来再次解析从解析中获得的值。我该如何解决?
从scrapy进口蜘蛛 从scrapy.selector导入选择器
从stack.items导入StackItem
StackSpider(Spider)类: 名称=“堆栈” allowed_domains = [“ *”] 全局n #n = 1997 start_urls = ['https://www.melon.com/chart/age/list.htm?chartType=YE&chartGenre=KPOP&chartDate=2010',]
def parse(self, response):
url = 'https://www.melon.com/song/detail.htm?songId='
questions = Selector(response).xpath('//*[@id="frm"]/table/tbody/tr')
for question in questions:
item = StackItem()
item['musicid'] = question.xpath('td/div/input/@value').extract()[0]
item['title'] = question.xpath('td[4]/div/div/div/span/strong/a/@title').extract()
item['artlist'] = question.xpath(
'td[4]/div/div/div[2]/div[1]/a/text()').extract()
item['album'] = question.xpath(
'td[4]/div/div/div[2]/div[2]/a/text()').extract()
item['sunwhi'] = question.xpath(
'td[2]/div/span/text()').extract()[0]
response_url=requests.get(url+musicid)
def parse(self, response):
questions = Selector(response).xpath('//*[@id="downloadfrm"]/div/div/div[2]/div[2]/dl/dd')
for question in questions:
item = StackItem()
item['album'] = question.xpath('a/text()').extract()[0]
yield item
答案 0 :(得分:0)
class StackSpider(Spider):
name = "stack"
allowed_domains = ["*"]
global n
#n = 1997
start_urls = ['https://www.melon.com/chart/age/list.htm?chartType=YE&chartGenre=KPOP&chartDate=2010',]
def parse(self, response):
url = 'https://www.melon.com/song/detail.htm?songId='
questions = Selector(response).xpath('//*[@id="frm"]/table/tbody/tr')
for question in questions:
item = StackItem()
item['musicid'] = question.xpath('td/div/input/@value').extract()[0]
item['title'] = question.xpath('td[4]/div/div/div/span/strong/a/@title').extract()
item['artlist'] = question.xpath(
'td[4]/div/div/div[2]/div[1]/a/text()').extract()
item['album'] = question.xpath(
'td[4]/div/div/div[2]/div[2]/a/text()').extract()
item['sunwhi'] = question.xpath(
'td[2]/div/span/text()').extract()[0]
response_url=requests.get(url+musicid)
def parse(self, response):
questions = Selector(response).xpath('//*[@id="downloadfrm"]/div/div/div[2]/div[2]/dl/dd')
for question in questions:
item = StackItem()
item['album'] = question.xpath('a/text()').extract()[0]
yield item