我有一个像下面给出的pandas数据框
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
AppUtility.lockOrientation(.portrait)
// Or to rotate and lock
// AppUtility.lockOrientation(.portrait, andRotateTo: .portrait)
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
// Don't forget to reset when view is being removed
AppUtility.lockOrientation(.all)
}
我想要
start_time end_time value
2017-01-09 21:49:55 2017-01-09 21:55:41 150.0
2017-01-09 21:55:41 2017-01-09 21:58:46 4.0
2017-01-09 22:00:55 2017-01-09 23:13:00 144.0
由于前两行是连续事件,我将其加入一行并添加了它们的值。 任何建议如何进行。
答案 0 :(得分:1)
假设您的数据框已按时间排序
from datetime import datetime
import pandas as pd
'''
start_time end_time value
2017-01-09 21:49:55 2017-01-09 21:55:41 150.0
2017-01-09 21:55:41 2017-01-09 21:58:46 4.0
2017-01-09 22:00:55 2017-01-09 23:13:00 144.0
'''
# your dataframe
df = pd.DataFrame({'start_time': [datetime(2017,1,9,21,49,55), datetime(2017,1,9,21,55,41),datetime(2017,1,9,22,00,55)], \
'end_time': [datetime(2017,1,9,21,55,41), datetime(2017,1,9,21,58,46),datetime(2017,1,9,23,13,00)], \
'value': [150.0, 4.0, 144.0]})
获取连续时间范围的第一个start_time
,以及连续时间范围的最新end_time
:
df['start_time_'] = df['start_time'].loc[df['end_time'].shift(1) != df['start_time']]
df['end_time_'] = df['end_time'].loc[df['end_time'] != df['start_time'].shift(-1)]
print(df)
现在df
如下所示:
start_time end_time value start_time_ end_time_
0 2017-01-09 21:49:55 2017-01-09 21:55:41 150.0 2017-01-09 21:49:55 NaT
1 2017-01-09 21:55:41 2017-01-09 21:58:46 4.0 NaT 2017-01-09 21:58:46
2 2017-01-09 22:00:55 2017-01-09 23:13:00 144.0 2017-01-09 22:00:55 2017-01-09 23:13:00
然后填写NA值:
df['start_time_'].fillna(method='ffill',inplace=True)
df['end_time_'].fillna(method='bfill',inplace=True)
使用start_time_
,end_time_
列来替换start_time
,end_time
列。并删除start_time_
,end_time_
列:
df['start_time'] = df['start_time_']
df['end_time'] = df['end_time_']
del df['start_time_']
del df['end_time_']
然后分组和总和:
df = df.groupby(['start_time', 'end_time'], as_index=False).sum()
print(df)
结果如下:
start_time end_time value
0 2017-01-09 21:49:55 2017-01-09 21:58:46 154.0
1 2017-01-09 22:00:55 2017-01-09 23:13:00 144.0
答案 1 :(得分:0)
可能不是最优雅的方式,但它有点有效:
df = pd.DataFrame({"start":[1,2,4], "end":[2,3,5], 'val':[1,1,1]})
tmp = df.set_index('end', drop=0).join(df.set_index('start', drop=0), how='left', rsuffix='_r')
tmp.val = tmp.val_r.fillna(0) + tmp.val
df = tmp[~tmp.end.isin(tmp.end_r)][['start', 'end', 'val']]
df.index = range(len(df))