我正在从网址中读取csv文件,并将所有csv文件附加到一个csv中。 最终的csv不包含来自的文件 https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni-20201023.csv
到
其他所有都还可以。 我已经尝试了一切,但是所有的csv似乎都不错,我不明白为什么这些csv不好(从... 20201023.csv到... 20201028.csv) 如果我单独阅读它们的话,那么问题就出现在pd.concatenate
中。可以帮忙吗?
'''
import pandas as pd
from pandas import read_csv
import requests
import io
from matplotlib import pyplot
from datetime import datetime
from datetime import timedelta
from datetime import date
import matplotlib.pyplot as plt
#url = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni-20200224.csv'
begin_date = date(2020, 10, 23)
delta3 = date.today() - begin_date
n = delta3.days
url_path_base = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni-'
data_vec = []
urls = []
for x in range(n):
el = datetime.today() - timedelta(x+1)
data_vec.append(el.strftime('%Y%m%d'))
url = url_path_base + el.strftime('%Y%m%d') + '.csv'
urls.append(url)
ds = []
#print(urls)
for f in urls:
s=requests.get(f).content
ds.append(pd.read_csv(io.StringIO(s.decode('utf-8'))))
frame = pd.concat(ds, axis=0, ignore_index=True)
frame.set_index("data")
'''
答案 0 :(得分:0)
Works for me as you've written your code except
frame.set_index("data")
should be
frame.set_index("data", inplace=True)
# 21 datapoints per file
In [25]: frame['stato'].groupby(frame.index).count()
Out[25]:
data
2020-10-23T17:00:00 21
2020-10-24T17:00:00 21
2020-10-25T17:00:00 21
2020-10-26T17:00:00 21
2020-10-27T17:00:00 21
2020-10-28T17:00:00 21
2020-10-29T17:00:00 21
2020-10-30T17:00:00 21
2020-10-31T17:00:00 21
2020-11-01T17:00:00 21
2020-11-02T17:00:00 21
2020-11-03T17:00:00 21
2020-11-04T17:00:00 21
2020-11-05T17:00:00 21
2020-11-06T17:00:00 21
2020-11-07T17:00:00 21
2020-11-08T17:00:00 21
2020-11-09T17:00:00 21
2020-11-10T17:00:00 21
2020-11-11T17:00:00 21
Name: stato, dtype: int64