我通过以下方式创建数据框:
filtered_data.groupby('weekday').agg({'airing': np.sum, 'uplift': [np.sum,np.mean]})
创建一个表格:
sum sum mean
weekday
1 11 20 1.818182
2 24 46 1.916667
...
我想要的是包括最后一行,即每列的总和。
提前致谢!
答案 0 :(得分:1)
您可以使用.loc
功能来实现:
df.loc[len(df)] = [df[col].sum() for col in df.columns]
答案 1 :(得分:1)
在这种情况下,您应该创建一个跟踪摘要统计信息的系列。如果你需要用于显示目的,你可以连接。
summary = pd.Series([filtered_data.airing.sum(),
filtered_data.uplift.sum(),
filtered_data.uplift.mean()],
name='summary')
答案 2 :(得分:0)
为此,我创建了一个聚合工具,其行为类似于SQL中的GROUPING SETS
。提供用于分组和聚合函数的列,并返回聚合的DataFrame。
import itertools as it
import pandas as pd
from pandas.util.testing import assert_frame_equal
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return it.chain.from_iterable(it.combinations(s,r) for r in range(len(s)+1))
def grouper(df,grpby,aggfunc):
''' produces aggregate DataFrame from DataFrames for non-redundant groupings
`workingdf` is used to avoid modifying original DataFrame
'''
uniqcols = set(col for col in grpby if len(df[col].unique()) == 1)
subset = set()
for col in uniqcols:
for grp in powerset(grpby):
if col in grp:
subset.add(grp) # add level of aggregation only when non-redundant
if len(subset) == 0:
for grp in powerset(grpby):
subset.add(grp)
workingdf = df.copy()
for idx,i in enumerate(subset):
if i != ():
tmp = aggfunc( workingdf.groupby(i) )
else:
# hack to get output to be a DataFrameGroupBy object:
# insert dummy column on which to group by
dummycolname = hash(tuple(workingdf.columns.tolist()))
workingdf[dummycolname] = ''
tmp = aggfunc( workingdf.groupby(dummycolname) )
# drop the index and add it back
if i == (): tmp.reset_index(drop=True,inplace=True)
else: tmp.reset_index(inplace=True)
for j in grpby:
if j not in tmp: # if column is not in DataFrame add it
tmp[j] = '(All)'
# new list with all columns including aggregate ones; do this only once
if idx == 0:
finalcols = grpby[:]
addlcols = [k for k in tmp if k not in grpby] # aggregate columns
finalcols.extend(addlcols)
# reorder columns
tmp = tmp[finalcols]
if idx == 0:
final = tmp; del tmp
else:
final = pd.concat( [final,tmp] ); del tmp
del workingdf
final.sort_values(finalcols,inplace=True)
final.reset_index(drop=True,inplace=True)
return final
def agg(grpbyobj):
''' the purpose of this function is to:
specify aggregate operation(s) you wish to perform,
name the resulting column(s) in the final DataFrame.
'''
tmp = pd.DataFrame()
tmp['Total (n)'] = grpbyobj['Total'].sum()
return tmp
if __name__ == '__main__':
df = pd.DataFrame({'Area':['a','a','b',],
'Year':[2014,2014,2014,],
'Month':[1,2,3,],
'Total':[4,5,6,],})
final = grouper(df,grpby=['Area','Year'],aggfunc=agg)
# test against expected result
expected = pd.DataFrame({u'Year': {0: 2014, 1: 2014, 2: 2014},
u'Total (n)': {0: 15, 1: 9, 2: 6},
u'Area': {0: u'(All)', 1: u'a', 2: u'b'}})
expected = expected[final.columns.tolist()]
try:
# check_names kwarg True: compare indexes and columns
assert_frame_equal(final,expected,check_names=True)
except AssertionError as e:
raise