使用python pandas白天分组2d数组的最佳方法是什么。 我想得到按2d数组分组的每日总和。结果也应该是2d数组。
import numpy as np
import pandas as pd
import itertools
from datetime import datetime
# create 72 hours of pseudo-data with 3 vertical levels and a 4 by 4
# horizontal grid.
data = np.ones((72, 3, 4))
t = pd.date_range(datetime(2008,7,1), freq='1H', periods=72)
s = pd.Series([data[i] for i in range(data.shape[0])],index=t)
2008-07-01 14:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 15:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 16:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 17:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 18:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 19:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 20:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 21:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 22:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-01 23:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 00:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 01:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 02:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 03:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 04:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 05:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
...
2008-07-02 18:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 19:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 20:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 21:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 22:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
2008-07-02 23:00:00 [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [...
结果 - >
2008-07-01 [[24.0, 24.0, 24.0, 24.0], [24.0, 24.0, 24.0, 24.0, [...
2008-07-02 [[24.0, 24.0, 24.0, 24.0], [24.0, 24.0, 24.0, 24.0, [...
或者是使用for循环的正确方法? 我不太满意在python中使用循环。
答案 0 :(得分:1)
对您的数据应用groupby
+ np.sum
操作。
s.groupby(pd.Grouper(freq='1D')).apply(np.sum, axis=0)
2008-07-01 [[24.0, 24.0, 24.0, 24.0], [24.0, 24.0, 24.0, ...
2008-07-02 [[24.0, 24.0, 24.0, 24.0], [24.0, 24.0, 24.0, ...
2008-07-03 [[24.0, 24.0, 24.0, 24.0], [24.0, 24.0, 24.0, ...
Freq: D, dtype: object