我是Python的新手。我正在尝试从网址下载并读取CSV。我有这段代码:
import csv
import requests
url = "http://www.aemet.es/es/eltiempo/observacion/ultimosdatos_8178D_datos-horarios.csv?k=clm&l=8178D&datos=det&w=0&f=temperatura&x=h24"
r = requests.get(url)
contenido = r.content
assert isinstance(contenido, basestring)
sin3lineas = '\r\n'.join( contenido.split('\r\n')[3:])
sin3lineasutf8 = sin3lineas.decode('latin-1').encode('utf-8')
print sin3lineasutf8
#cr = csv.reader(sin3lineasutf8,quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=False)
cr = csv.reader(sin3lineasutf8)
for row in cr:
print row,
print len(row)
阅读的结果是:
"Fecha y hora oficial","Temperatura (ºC)","Velocidad del viento (km/h)","Dirección del viento","Racha (km/h)","Dirección de racha","Precipitación (mm)","Presión (hPa)","Tendencia (hPa)","Humedad (%)"
"19/02/2016 00:00","4.4","0","Calma","6","Nordeste","0.0","937.0","1.0","88"
"18/02/2016 02:00","4.3","10","Oeste","25","Oeste","0.0","935.3","-0.2","60"
...
"18/02/2016 01:00","4.2","12","Oeste","30","Noroeste","0.0","935.7","0.0","58"
['Fecha y hora oficial'] 1
['', ''] 2
['Temperatura (\xc2\xbaC)'] 1
['', ''] 2
['Velocidad del viento (km/h)'] 1
['', ''] 2
['Direcci\xc3\xb3n del viento'] 1
['', ''] 2
...
[] 0
[] 0
Process finished with exit code 0
readed CSV有很多行而不是单元格,但我不知道为什么。 我尝试了函数csv.read的许多参数,但没有一个能解决问题。
有什么想法吗?
答案 0 :(得分:3)
将response.text
放入缓冲区为我工作:
import csv
from StringIO import StringIO
import requests
url = "http://www.aemet.es/es/eltiempo/observacion/ultimosdatos_8178D_datos-horarios.csv?k=clm&l=8178D&datos=det&w=0&f=temperatura&x=h24"
r = requests.get(url)
buffer = StringIO(r.text)
cr = csv.reader(buffer)
for row in cr:
print(row)
打印:
['Albacete']
['Actualizado: viernes', ' 19 febrero 2016 a las 00:42 hora oficial']
[]
['Fecha y hora oficial', 'Temperatura (ºC)', 'Velocidad del viento (km/h)', 'Dirección del viento', 'Racha (km/h)', 'Dirección de racha', 'Precipitación (mm)', 'Presión (hPa)', 'Tendencia (hPa)', 'Humedad (%)']
['19/02/2016 00:00', '4.4', '0', 'Calma', '6', 'Nordeste', '0.0', '937.0', '1.0', '88']
['18/02/2016 23:00', '4.7', '0', 'Calma', '5', 'Nordeste', '0.0', '936.9', '1.5', '86']
['18/02/2016 22:00', '4.7', '0', 'Calma', '5', 'Nordeste', '0.0', '936.7', '1.9', '84']
['18/02/2016 21:00', '5.3', '0', 'Calma', '5', 'Nordeste', '0.0', '936.0', '1.8', '83']
['18/02/2016 20:00', '5.9', '0', 'Calma', '5', 'Noroeste', '0.0', '935.4', '1.3', '82']
['18/02/2016 19:00', '6.4', '0', 'Calma', '10', 'Oeste', '0.0', '934.8', '1.2', '77']
['18/02/2016 18:00', '6.9', '2', 'Noroeste', '13', 'Noroeste', '0.0', '934.2', '0.6', '74']
['18/02/2016 17:00', '7.1', '4', 'Noroeste', '12', 'Oeste', '0.0', '934.1', '0.3', '73']
['18/02/2016 16:00', '7.2', '3', 'Noroeste', '12', 'Noroeste', '0.0', '933.6', '-0.7', '73']
['18/02/2016 15:00', '7.0', '4', 'Noroeste', '14', 'Noroeste', '0.0', '933.6', '-1.3', '77']
['18/02/2016 14:00', '6.6', '2', 'Noroeste', '16', 'Noroeste', '0.0', '933.8', '-1.1', '79']
['18/02/2016 13:00', '6.3', '5', 'Noroeste', '18', 'Noroeste', '0.0', '934.3', '-0.5', '82']
['18/02/2016 12:00', '5.5', '5', 'Noroeste', '14', 'Oeste', '0.0', '934.9', '0.1', '87']
['18/02/2016 11:00', '4.6', '3', 'Noroeste', '9', 'Oeste', '0.0', '934.9', '1.2', '95']
['18/02/2016 10:00', '3.4', '0', 'Calma', '5', 'Nordeste', '0.0', '934.8', '0.6', '95']
['18/02/2016 09:00', '3.1', '0', 'Calma', '5', 'Norte', '0.0', '934.8', '0.7', '94']
['18/02/2016 08:00', '3.4', '0', 'Calma', '5', 'Oeste', '0.0', '933.7', '-0.2', '91']
['18/02/2016 07:00', '3.5', '0', 'Calma', '10', 'Oeste', '0.0', '934.2', '-0.2', '88']
['18/02/2016 06:00', '3.6', '3', 'Noroeste', '15', 'Oeste', '0.0', '934.1', '-0.9', '86']
['18/02/2016 05:00', '3.9', '5', 'Oeste', '18', 'Sudoeste', '0.0', '933.9', '-1.4', '73']
['18/02/2016 04:00', '4.1', '7', 'Oeste', '17', 'Oeste', '0.0', '934.4', '-1.3', '68']
['18/02/2016 03:00', '4.2', '7', 'Oeste', '23', 'Noroeste', '0.0', '935.0', '-0.6', '64']
['18/02/2016 02:00', '4.3', '10', 'Oeste', '25', 'Oeste', '0.0', '935.3', '-0.2', '60']
['18/02/2016 01:00', '4.2', '12', 'Oeste', '30', 'Noroeste', '0.0', '935.7', '0.0', '58']
如果您想跳过前4行以获取实际数据,可以使用islice()
:
from itertools import islice
for row in islice(cr, 4, None):
print(row)