我是Python编程的新手。我只是想知道如何整理同一类别下的数据点,尤其是当抓取已循环到下一页时,即
温度预测位置 温度1预测1位置1 临时2预报2位置2
我最初使用yield提供我想要的数据:
def parse_time(self, response):
hourly_table = response.xpath('//*[@class= "hourly-table overview-hourly"]')
for hour in hourly_table:
name = hour.xpath('//h1/text()').extract()
first_hour = hour.xpath('//*[@class="hourly-table overview-hourly"]/table/thead/tr[1]/td[1]/div/text()').extract()
forecast_1 = hour.xpath('//*[@class="hourly-table overview-hourly"]/table/tbody/tr[1]/td[1]/span/text()').extract()
temp_1 = hour.xpath('//*[@class="hourly-table overview-hourly"]/table/tbody/tr[2]/td[1]/span/text()').extract()
cloud_cover_1 = hour.xpath('//*[@class="hourly-table sky-hourly"]/table/tbody/tr[2]/td[1]/span/text()').extract()
humidity_1 = hour.xpath('//*[@class="hourly-table sky-hourly"]/table/tbody/tr[3]/td[1]/span/text()').extract()
rain_1 = hour.xpath('//*[@class="hourly-table precip-hourly"]/table/tbody/tr[1]/td[1]/span/text()').extract()
ice_1 = hour.xpath('//*[@class="hourly-table precip-hourly"]/table/tbody/tr[2]/td[1]/span/text()').extract()
snow_1 = hour.xpath('//*[@class="hourly-table precip-hourly"]/table/tbody/tr[3]/td[1]/span/text()').extract()
second_hour = hour.xpath('//*[@class="hourly-table overview-hourly"]/table/thead/tr[1]/td[2]/div/text()').extract()
forecast_2 = hour.xpath('//*[@class="hourly-table overview-hourly"]/table/tbody/tr[1]/td[2]/span/text()').extract()
temp_2 = hour.xpath('//*[@class="hourly-table overview-hourly"]/table/tbody/tr[2]/td[2]/span/text()').extract()
cloud_cover_2 = hour.xpath('//*[@class="hourly-table sky-hourly"]/table/tbody/tr[2]/td[2]/span/text()').extract()
humidity_2 = hour.xpath('//*[@class="hourly-table sky-hourly"]/table/tbody/tr[3]/td[2]/span/text()').extract()
rain_2 = hour.xpath('//*[@class="hourly-table precip-hourly"]/table/tbody/tr[1]/td[2]/span/text()').extract()
ice_2 = hour.xpath('//*[@class="hourly-table precip-hourly"]/table/tbody/tr[2]/td[2]/span/text()').extract()
snow_2 = hour.xpath('//*[@class="hourly-table precip-hourly"]/table/tbody/tr[3]/td[2]/span/text()').extract()
yield{'name':name,
'first_hour':first_hour,
'forecast_1':forecast_1,
'temperature_1':temp_1,
'cloud_cover_1':cloud_cover_1,
'humidity_1': humidity_1,
'rain_1':rain_1,
'ice_1': ice_1,
'snow_1': snow_1,
'second_hour':second_hour,
'forecast_2':forecast_2,
'temperature_2':temp_2,
'cloud_cover_2':cloud_cover_2,
'humidity_2': humidity_2,
'rain_2':rain_2,
'ice_2': ice_2,
'snow_2': snow_2}
next_eight = response.xpath('//*[@class="right-float"]/@href').extract_first()
if next_eight:
if "/262311?hour=23" in next_eight:
pass
else:
yield Request(response.urljoin(next_eight), callback=self.parse_time)