Python版本:3.6
熊猫版本:0.21.1
我如何从
获得print(df_raw)
device_id temp_a temp_b temp_c
0 0 0.2 0.8 0.6
1 0 0.1 0.9 0.4
2 1 0.3 0.7 0.2
3 2 0.5 0.5 0.1
4 2 0.1 0.9 0.4
5 2 0.7 0.3 0.9
到
print(df_except2)
device_id temp_a temp_b temp_c temp_a_1 temp_b_1 temp_c_1 temp_a_2 \
0 0 0.2 0.8 0.6 0.1 0.9 0.4 NaN
1 1 0.3 0.7 0.2 NaN NaN NaN NaN
2 2 0.5 0.5 0.1 0.1 0.9 0.4 0.7
temp_b_2 temp_c_2
0 NaN NaN
1 NaN NaN
2 0.3 0.9
数据代码:
df_raw = pd.DataFrame({'device_id' : ['0','0','1','2','2','2'],
'temp_a' : [0.2,0.1,0.3,0.5,0.1,0.7],
'temp_b' : [0.8,0.9,0.7,0.5,0.9,0.3],
'temp_c' : [0.6,0.4,0.2,0.1,0.4,0.9],
})
print(df_raw)
df_except = pd.DataFrame({'device_id' : ['0','1','2'],
'temp_a':[0.2,0.3,0.5],
'temp_b':[0.8,0.7,0.5],
'temp_c':[0.6,0.2,0.1],
'temp_a_1':[0.1,None,0.1],
'temp_b_1':[0.9,None,0.9],
'temp_c_1':[0.4,None,0.4],
'temp_a_2':[None,None,0.7],
'temp_b_2':[None,None,0.3],
'temp_c_2':[None,None,0.9],
})
df_except2 = df_except[['device_id','temp_a','temp_b','temp_c','temp_a_1','temp_b_1','temp_c_1','temp_a_2','temp_b_2','temp_c_2']]
print(df_except2)
注意:
1. 多行的数量是未知的。
我参考以下答案:
Pandas Dataframe - How to combine multiple rows to one
但这个答案只能处理一个专栏。
答案 0 :(得分:1)
使用:
g = df_raw.groupby('device_id').cumcount()
df = df_raw.set_index(['device_id', g]).unstack().sort_index(axis=1, level=1)
df.columns = ['{}_{}'.format(i,j) if j != 0 else '{}'.format(i) for i, j in df.columns]
df = df.reset_index()
print (df)
device_id temp_a temp_b temp_c temp_a_1 temp_b_1 temp_c_1 temp_a_2 \
0 0 0.2 0.8 0.6 0.1 0.9 0.4 NaN
1 1 0.3 0.7 0.2 NaN NaN NaN NaN
2 2 0.5 0.5 0.1 0.1 0.9 0.4 0.7
temp_b_2 temp_c_2
0 NaN NaN
1 NaN NaN
2 0.3 0.9
<强>解释强>:
cumcount
按列device_id
set_index
和系列MultiIndex
g
unstack
sort_index
MultiIndex
的第二级
reset_index
醇>
答案 1 :(得分:0)
代码:
import numpy as np
device_id_list = df_raw['device_id'].tolist()
device_id_list = list(np.unique(device_id_list))
append_df = pd.DataFrame()
for device_id in device_id_list:
tmp_df = df_raw.query('device_id=="%s"'%(device_id))
if len(tmp_df)>1:
one_raw_list=[]
for i in range(0,len(tmp_df)):
one_raw_df = tmp_df.iloc[i:i+1]
one_raw_list.append(one_raw_df)
tmp_combine_df = pd.DataFrame()
for i in range(0,len(one_raw_list)-1):
next_raw = one_raw_list[i+1].drop(columns=['device_id']).reset_index(drop=True)
new_name_list=[]
for old_name in list(next_raw.columns):
new_name_list.append(old_name+'_'+str(i+1))
next_raw.columns = new_name_list
if i==0:
current_raw = one_raw_list[i].reset_index(drop=True)
tmp_combine_df = pd.concat([current_raw, next_raw], axis=1)
else:
tmp_combine_df = pd.concat([tmp_combine_df, next_raw], axis=1)
tmp_df = tmp_combine_df
tmp_df_columns = tmp_df.columns
append_df_columns = append_df.columns
append_df = pd.concat([append_df,tmp_df],ignore_index =True)
if len(tmp_df_columns) > len(append_df_columns):
append_df = append_df[tmp_df_columns]
else:
append_df = append_df[append_df_columns]
print(append_df)
输出:
device_id temp_a temp_b temp_c temp_a_1 temp_b_1 temp_c_1 temp_a_2 \
0 0 0.2 0.8 0.6 0.1 0.9 0.4 NaN
1 1 0.3 0.7 0.2 NaN NaN NaN NaN
2 2 0.5 0.5 0.1 0.1 0.9 0.4 0.7
temp_b_2 temp_c_2
0 NaN NaN
1 NaN NaN
2 0.3 0.9
答案 2 :(得分:0)
df = pd.DataFrame({'device_id' : ['0','0','1','2','2','2'],
'temp_a' : [0.2,0.1,0.3,0.5,0.1,0.7],
'temp_b' : [0.8,0.9,0.7,0.5,0.9,0.3],
'temp_c' : [0.6,0.4,0.2,0.1,0.4,0.9],
})
cols_of_interest = df.columns.drop('device_id')
df["C"] = "C_" + (df.groupby("device_id").cumcount() + 1).astype(str)
df.pivot_table(index="device_id", values=cols_of_interest, columns="C")
输出:
temp_a temp_b temp_c
C C_1 C_2 C_3 C_1 C_2 C_3 C_1 C_2 C_3
device_id
0 0.2 0.1 NaN 0.8 0.9 NaN 0.6 0.4 NaN
1 0.3 NaN NaN 0.7 NaN NaN 0.2 NaN NaN
2 0.5 0.1 0.7 0.5 0.9 0.3 0.1 0.4 0.9