熊猫按日期分组时抛出错误:有一个rangeindex实例

时间:2019-02-25 23:18:03

标签: python pandas pandas-groupby python-datetime

我的python代码非常简单。我正在运行一个SQL查询,该查询为我提供了提交文件时的datetime列值。最后,我想要的是按月汇总的提交计数,例如

2017-05 | 2
2017-12 | 1

以此类推

filename = 'myFile'
query = ("select date "
         "from mytable where branch like %s and repository like %s "
         "and filename like %s ")
cursor = churn_db_connection.get_connection_cursor()
cursor.execute(query, (branch, repository,filename,))
dt = []
for (date) in cursor:
    dt.append(date[0])
cursor.close()


file1series = pd.DataFrame({
                filename: dt
                })
file1series[filename] = pd.to_datetime(file1series[filename])
print(file1series.dtypes)
print(file1series.head())
g = file1series[filename].groupby(pd.Grouper(freq="M")) # This line throws the error
print(g.head())

----打印语句的输出------。

myFile    datetime64[ns]
dtype: object
  myFile
0 2018-04-06 14:00:08                                                                                     
1 2017-03-15 00:51:20                                                                                     
2 2017-12-07 05:38:22                                                                                     
3 2017-05-12 16:40:05                                                                                     
4 2017-05-03 10:13:47                                                                                     

这是错误

TypeError                                 Traceback (most recent call last)
<ipython-input-70-6d36509ffb53> in <module>
     43 print(file1series.dtypes)
     44 print(file1series.head())
---> 45 g = file1series[filename].groupby(pd.Grouper(freq="M"))
     46 print(g.head())

~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/generic.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, **kwargs)
   6663         return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
   6664                        sort=sort, group_keys=group_keys, squeeze=squeeze,
-> 6665                        observed=observed, **kwargs)
   6666 
   6667     def asfreq(self, freq, method=None, how=None, normalize=False,

~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in groupby(obj, by, **kwds)
   2150         raise TypeError('invalid type: %s' % type(obj))
   2151 
-> 2152     return klass(obj, by, **kwds)
   2153 
   2154 

~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, **kwargs)
    597                                                     sort=sort,
    598                                                     observed=observed,
--> 599                                                     mutated=self.mutated)
    600 
    601         self.obj = obj

~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _get_grouper(obj, key, axis, level, sort, observed, mutated, validate)
   3187     # a passed-in Grouper, directly convert
   3188     if isinstance(key, Grouper):
-> 3189         binner, grouper, obj = key._get_grouper(obj, validate=False)
   3190         if key.key is None:
   3191             return grouper, [], obj

~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/resample.py in _get_grouper(self, obj, validate)
   1278     def _get_grouper(self, obj, validate=True):
   1279         # create the resampler and return our binner
-> 1280         r = self._get_resampler(obj)
   1281         r._set_binner()
   1282         return r.binner, r.grouper, r.obj

~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/resample.py in _get_resampler(self, obj, kind)
   1274         raise TypeError("Only valid with DatetimeIndex, "
   1275                         "TimedeltaIndex or PeriodIndex, "
-> 1276                         "but got an instance of %r" % type(ax).__name__)
   1277 
   1278     def _get_grouper(self, obj, validate=True):

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'

我已经尝试过SOF页面并尝试了所有方法,但此错误仍然存​​在。可能是我想念的一件简单的事情。请指教

1 个答案:

答案 0 :(得分:1)

# Create DataFrame and Month column
df = pd.DataFrame({
    'Date': ['2018-04-06 14:00:08', '2017-03-15 00:51:20',
             '2017-12-07 05:38:22', '2017-05-12 16:40:05',
             '2017-05-03 10:13:47']
})
df['Date'] = df['Date'].astype('datetime64')
df['Month'] = df['Date'].dt.strftime('%Y-%m')

>>> df
    Date                    Month
0   2018-04-06 14:00:08     2018-04
1   2017-03-15 00:51:20     2017-03
2   2017-12-07 05:38:22     2017-12
3   2017-05-12 16:40:05     2017-05
4   2017-05-03 10:13:47     2017-05

每月计数条目

>>> df.groupby('Month').size().to_frame(name='Count')

Month       Count   
2017-03     1
2017-05     2
2017-12     1
2018-04     1