我正在使用此dataframe
(df1
):
df1 = pd.read_csv('City_Zhvi_AllHomes.csv',header=None)
其内容包含以年 - 月(1-12)格式编写的列。现在,我必须创建一个新的dataframe
,其中包含来自Df1
的数据,分为4个季度。这就是我所做的:
Month = ['00','01','02','03','04','05','06','07','08','09','10','11','12']
Year=['00','01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16']
for i in range(0,15):
for j in range(0,12):
newdf = pd.DataFrame[df1.loc[2,'20'+Year[i]+'-'+Month[j+1]] + df1.loc[2,'20'+Year[i]+'-'+Month[j+2]]+ df1.loc[2,'20'+Year[i]+'-'+Month[j+3]],columns = ['Q1','Q2','Q3','Q4'] ]
j = j + 3
print newdf .
我已经专门为第二行做了这个,但我需要为整个数据集执行此操作
现在我收到了这个错误:
'type'对象没有属性' getitem '
以下是样本数据:
0 CountyName SizeRank 2000-01 2000-02 2000-03 2000-04
1 Queens 1 NaN NaN NaN NaN
2 Los Angeles 2 204400 207000 209800 212300
3 Cook 3 136800 138300 140100 141900
4 Philadelphia 4 52700 53100 53200 53400
newdf应该包含:
Q1
(204400+207000+209800)
同样Q2,Q3,Q4
我该如何处理?
答案 0 :(得分:1)
我认为您可以使用axis=1
列sum
来汇总CountyName
,但首先必须resample
列SizeRank
和#remove header=None for first row in csv as columns
df = pd.read_csv('City_Zhvi_AllHomes.csv')
df = df.set_index(['CountyName','SizeRank'])
#with real data set index of all not dates columns
#df = df.set_index(['RegionID', 'RegionName', 'State', 'Metro', 'CountyName', 'SizeRank'])
df.columns = pd.to_datetime(df.columns).to_period('M')
print (df)
2000-01 2000-02 2000-03 2000-04
CountyName SizeRank
Queens 1 NaN NaN NaN NaN
Los Angeles 2 204400.0 207000.0 209800.0 212300.0
Cook 3 136800.0 138300.0 140100.0 141900.0
Philadelphia 4 52700.0 53100.0 53200.0 53400.0
print (df.columns)
PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04'], dtype='period[M]', freq='M')
df = df.resample('Q', axis=1).sum()
print (df)
2000Q1 2000Q2
CountyName SizeRank
Queens 1 NaN NaN
Los Angeles 2 621200.0 212300.0
Cook 3 415200.0 141900.0
Philadelphia 4 159000.0 53400.0
然后首先转换列名称set_index
然后转换to_datetime
:
def bubble_sort_by(array)
array.each_with_index do |outer, outer_index|
array.each_with_index do |inner, inner_index|
if yield(outer, inner) > 0
if outer_index < inner_index
p "outer after if: " + outer # for debugging, indented for clarity
p "inner after if: " + inner # for debugging
outer, inner = inner, outer
p "outer after swap: " + outer # for debugging
p "inner after swap: " + inner # for debugging
end
end
end
end
end
bubble_sort_by(["hi","hello","hey"]) do |left,right|
left.length - right.length
end