我的python代码非常简单。我正在运行一个SQL查询,该查询为我提供了提交文件时的datetime列值。最后,我想要的是按月汇总的提交计数,例如
2017-05 | 2
2017-12 | 1
以此类推
filename = 'myFile'
query = ("select date "
"from mytable where branch like %s and repository like %s "
"and filename like %s ")
cursor = churn_db_connection.get_connection_cursor()
cursor.execute(query, (branch, repository,filename,))
dt = []
for (date) in cursor:
dt.append(date[0])
cursor.close()
file1series = pd.DataFrame({
filename: dt
})
file1series[filename] = pd.to_datetime(file1series[filename])
print(file1series.dtypes)
print(file1series.head())
g = file1series[filename].groupby(pd.Grouper(freq="M")) # This line throws the error
print(g.head())
----打印语句的输出------。
myFile datetime64[ns]
dtype: object
myFile
0 2018-04-06 14:00:08
1 2017-03-15 00:51:20
2 2017-12-07 05:38:22
3 2017-05-12 16:40:05
4 2017-05-03 10:13:47
这是错误
TypeError Traceback (most recent call last)
<ipython-input-70-6d36509ffb53> in <module>
43 print(file1series.dtypes)
44 print(file1series.head())
---> 45 g = file1series[filename].groupby(pd.Grouper(freq="M"))
46 print(g.head())
~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/generic.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, **kwargs)
6663 return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
6664 sort=sort, group_keys=group_keys, squeeze=squeeze,
-> 6665 observed=observed, **kwargs)
6666
6667 def asfreq(self, freq, method=None, how=None, normalize=False,
~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in groupby(obj, by, **kwds)
2150 raise TypeError('invalid type: %s' % type(obj))
2151
-> 2152 return klass(obj, by, **kwds)
2153
2154
~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, **kwargs)
597 sort=sort,
598 observed=observed,
--> 599 mutated=self.mutated)
600
601 self.obj = obj
~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _get_grouper(obj, key, axis, level, sort, observed, mutated, validate)
3187 # a passed-in Grouper, directly convert
3188 if isinstance(key, Grouper):
-> 3189 binner, grouper, obj = key._get_grouper(obj, validate=False)
3190 if key.key is None:
3191 return grouper, [], obj
~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/resample.py in _get_grouper(self, obj, validate)
1278 def _get_grouper(self, obj, validate=True):
1279 # create the resampler and return our binner
-> 1280 r = self._get_resampler(obj)
1281 r._set_binner()
1282 return r.binner, r.grouper, r.obj
~/Documents/proj/venv/lib/python3.7/site-packages/pandas/core/resample.py in _get_resampler(self, obj, kind)
1274 raise TypeError("Only valid with DatetimeIndex, "
1275 "TimedeltaIndex or PeriodIndex, "
-> 1276 "but got an instance of %r" % type(ax).__name__)
1277
1278 def _get_grouper(self, obj, validate=True):
TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'
我已经尝试过SOF页面并尝试了所有方法,但此错误仍然存在。可能是我想念的一件简单的事情。请指教
答案 0 :(得分:1)
# Create DataFrame and Month column
df = pd.DataFrame({
'Date': ['2018-04-06 14:00:08', '2017-03-15 00:51:20',
'2017-12-07 05:38:22', '2017-05-12 16:40:05',
'2017-05-03 10:13:47']
})
df['Date'] = df['Date'].astype('datetime64')
df['Month'] = df['Date'].dt.strftime('%Y-%m')
>>> df
Date Month
0 2018-04-06 14:00:08 2018-04
1 2017-03-15 00:51:20 2017-03
2 2017-12-07 05:38:22 2017-12
3 2017-05-12 16:40:05 2017-05
4 2017-05-03 10:13:47 2017-05
每月计数条目
>>> df.groupby('Month').size().to_frame(name='Count')
Month Count
2017-03 1
2017-05 2
2017-12 1
2018-04 1