Pandas GroupBy对象在通过自定义函数

时间:2016-06-14 12:34:53

标签: python python-3.x pandas

我正在使用数据集进行以下测试

df = pandas.DataFrame({'A' : ['foo', 'bar', 'foo','bar','foo', 'bar','foo', 'foo'],'B' : ['one', 'one', 'two','three','two', 'two', 'one', 'three'],'C' : np.random.randn(8),'D' : np.random.randn(8)})
df = pandas.concat([df, df])
df.reset_index(inplace= True, drop = True)
df['dates'] = pandas.date_range('1/1/2000', periods = df.shape[0])
grouped = df.groupby(['A','B'], as_index = False)

现在我写了一个像

这样的自定义函数
def knowType(x):
    print(type(x))

并应用于像这样的“分组”组对象

grouped.agg(knowType)

我得到了这样的标准:

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>

但是当我改变这个功能时

def knowType(x):
    print(type(x))
    uniq = x.unique()
    print(",".join(uniq))

并将此类似地称为

grouped.agg(knowType)

我收到以下错误:

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-41-5e12be303545> in <module>()
----> 1 grouped.agg(knowType)

/home/srinath/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
   3595     @Appender(SelectionMixin._agg_doc)
   3596     def aggregate(self, arg, *args, **kwargs):
-> 3597         return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
   3598 
   3599     agg = aggregate

/home/srinath/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
   3120             # grouper specific aggregations
   3121             if self.grouper.nkeys > 1:
-> 3122                 return self._python_agg_general(arg, *args, **kwargs)
   3123             else:
   3124 

/home/srinath/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py in _python_agg_general(self, func, *args, **kwargs)
    781 
    782         if len(output) == 0:
--> 783             return self._python_apply_general(f)
    784 
    785         if self.grouper._filter_empty_groups:

/home/srinath/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py in _python_apply_general(self, f)
    653     def _python_apply_general(self, f):
    654         keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 655                                                    self.axis)
    656 
    657         return self._wrap_applied_output(

/home/srinath/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py in apply(self, f, data, axis)
   1525             # group might be modified
   1526             group_axes = _get_axes(group)
-> 1527             res = f(group)
   1528             if not _is_indexed_like(res, group_axes):
   1529                 mutated = True

/home/srinath/anaconda3/lib/python3.5/site-packages/pandas/core/groupby.py in <lambda>(x)
    769     def _python_agg_general(self, func, *args, **kwargs):
    770         func = self._is_builtin_func(func)
--> 771         f = lambda x: func(x, *args, **kwargs)
    772 
    773         # iterate through "columns" ex exclusions to populate output dict

<ipython-input-40-e3b0c59fb896> in knowType(x)
     16 def knowType(x):
     17     print(type(x))
---> 18     uniq = x.unique()
     19     print(",".join(uniq))

/home/srinath/anaconda3/lib/python3.5/site-packages/pandas/core/generic.py in __getattr__(self, name)
   2670             if name in self._info_axis:
   2671                 return self[name]
-> 2672             return object.__getattribute__(self, name)
   2673 
   2674     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'unique'

有趣的是stdout说类型是DataFrame。这是为什么? 我经历了this但是无法得到那里发生的事情。有谁能够帮我? 谢谢

0 个答案:

没有答案