import pandas as pd
import pdb, random
dates = pd.date_range('1950-01-01', '1950-12-31', freq='D')
data = [int(1000*random.random()) for i in xrange(len(dates))]
cum_data = pd.Series(data, index=dates)
如何在未来10年内重复此数据框架?
答案 0 :(得分:2)
这应该有用。
对于您提供的年份列表中的每一年,我使用相同的初始数据集扩展数据,除非它是闰年。
在闰年,我在第60天(1月31日+闰年2月29日)插入fill_leap
定义的条目
import pandas as pd
import pdb, random
def multiYearData(data, start_y, until_y, fill_leap):
def leapyr(n):
if n % 4 != 0:
return False
elif n % 100 != 0:
return True
elif n % 400 != 0:
return False
else:
return True
multi_year = []
for y in range(start_y,until_y):
if leapyr(y):
multi_year.extend( data[:60] + [fill_leap] + data[60:] )
else:
multi_year.extend(data)
return multi_year
start_y = 1950
until_y = 1960
dates = pd.date_range(str(start_y)+'-01-01', str(until_y-1)+'-12-31', freq='D')
data = [int(1000.*random.random()) for i in xrange(365)]
data = multiYearData(data, start_y, until_y, fill_leap=100000)
cum_data = pd.Series(data, index=dates)
我从this previous post借用了leapyr()
函数。
使用fill_leap=100000
,您可以检查闰年是否已正确定义:
import matplotlib.pyplot as plt
plt.show()
答案 1 :(得分:1)
这很有效。
它使用词典理解来填充每个月每天的随机数(2012年是闰年,因此它包含一个随机数,将应用于所有2月29日的闰年日期。)
然后该方法创建一个Pandas系列,我根据日期索引查找每个月和每日的随机数。
from calendar import monthrange
from numpy.random import rand
start_date = pd.Timestamp('1950-1-1')
end_date = pd.Timestamp('1960-1-1')
dates = pd.date_range(start_date, end_date, freq='D')
data_dict = {m + 1: {d + 1: rand()
for d in range(monthrange(2012, m + 1)[1])}
for m in range(12)}
cum_data = pd.Series(map(lambda x: data_dict.get(x.month).get(x.day), dates), index=dates)
示例输出
>>> {d.year: v for d, v in cum_data.iteritems() if d.month == 12 and d.day==31}
{1950: 0.66380331904182033,
1951: 0.66380331904182033,
1952: 0.66380331904182033,
1953: 0.66380331904182033,
1954: 0.66380331904182033,
1955: 0.66380331904182033,
1956: 0.66380331904182033,
1957: 0.66380331904182033,
1958: 0.66380331904182033,
1959: 0.66380331904182033}
>>> cum_data.iloc[:10]
1950-01-01 0.539305
1950-01-02 0.358602
1950-01-03 0.388185
1950-01-04 0.089234
1950-01-05 0.523388
1950-01-06 0.983208
1950-01-07 0.882484
1950-01-08 0.597809
1950-01-09 0.797835
1950-01-10 0.484002
Freq: D, dtype: float64
>>> side_by_side(cum_data['1952-2-25':'1952-3-1'],
cum_data['1953-2-25':'1953-3-1'],
cum_data['1956-2-25':'1956-3-1'])
1952-02-25 0.711973 1953-02-25 0.711973 1956-02-25 0.711973
1952-02-26 0.480222 1953-02-26 0.480222 1956-02-26 0.480222
1952-02-27 0.558472 1953-02-27 0.558472 1956-02-27 0.558472
1952-02-28 0.058541 1953-02-28 0.058541 1956-02-28 0.058541
1952-02-29 0.019080 1953-03-01 0.430778 1956-02-29 0.019080
1952-03-01 0.430778 Freq: D, dtype: float64 1956-03-01 0.430778
Freq: D, dtype: float64 Freq: D, dtype: float64