我有2个数据帧如下:
import pandas as pd
df = pd.DataFrame([[1,2,1],[4,15,11],[7,22,15]],columns=['Start','End','Duration'])
df2 = pd.DataFrame([[0,2,3],[1,2,2],[1,8,9],[2,1,1],[2,1,1]],columns=['Index','Data1','Data2'])
df
Out[6]:
Start End Duration
0 1 2 1
1 4 15 11
2 7 22 15
df2
Out[7]:
Index Data1 Data2
0 0 2 3
1 1 2 2
2 1 8 9
3 2 1 1
4 2 1 1
我想要做的是使用以下标准创建第3个数据帧df3:
查看df2的专栏Index
,
1)如果有1个唯一值,我取df [Index]的开始和结束并将其附加到df2,以便df3
df3
Out[7]:
Index Data1 Data2 Start End Duration
0 2 3 1 2 1
2)如果存在多个相等的值(例如,对于Index = 1),我取第一个的开始(即4)并且结束I加5 - > 9和持续时间将是5
df3
Out[7]:
Index Data1 Data2 Start End Duration
0 2 3 1 2 1
1 2 2 4 9 5
另一个
df3
Out[7]:
Index Data1 Data2 Start End Duration
0 2 3 1 2 1
1 2 2 4 9 5
1 8 9 10 15 5
最终的数据帧df3应为:
df3
Out[7]:
Index Data1 Data2 Start End Duration
0 2 3 1 2 1
1 2 2 4 9 5
1 8 9 10 15 5
2 1 1 7 12 5
2 1 1 13 18 5
这是我的尝试:
import pandas as pd
import numpy as np
df1 = pd.DataFrame([[1,2,1],[4,15,11],[7,22,15]],columns=['Start','End','Duration'])
df2 = pd.DataFrame([[0,2,3],[1,2,2],[1,8,9],[2,1,1],[2,1,1]],columns=['Index','Data1','Data2'])
df2.index = df2['Index']
df3 = pd.DataFrame()
for index in df1.index:
current_idx_df1 = pd.DataFrame(df1.loc[index].values.reshape(1,-1),columns=[df1.keys()])
if np.sum(df2.index==index)==1:
current_idx_df2 = pd.DataFrame(df2.loc[index].values.reshape(1,-1),columns=[df2.keys()])
df3 = df3.append(pd.concat([current_idx_df1, current_idx_df2 ], axis=1))
df3.reset_index(drop =True,inplace =True)
else:
current_idx_df2 = df2.loc[index]
df3_temp = pd.concat([current_idx_df1, pd.DataFrame(current_idx_df2.iloc[0].values.reshape(1,-1),columns=[current_idx_df2.keys()]) ], axis=1)
df3_temp['End'] = df3_temp['Start']+5
df3_temp['Duration'] = 5
df3 = df3.append(df3_temp)
df3.reset_index(drop =True,inplace =True)
for index2 in range(1,np.sum(df2.index==index)):
df3_temp = pd.concat([current_idx_df1, pd.DataFrame(current_idx_df2.iloc[index2].values.reshape(1,-1),columns=[current_idx_df2.keys()]) ], axis=1)
df3_temp['Start'] = df3['End'].values[-1]+1
df3_temp['End'] = df3['End'].values[-1]+1+5
df3_temp['Duration'] = 5
df3 = df3.append(df3_temp)
df3.reset_index(drop =True,inplace =True)