Question

我正在使用熊猫来计算数据文件的某些统计信息，并出现一些错误。可以通过以下简单的示例代码进行复制：

import pandas as pd

df = pd.DataFrame({'A': [1,2,3,4,5,6,7,8,9],
                   'B': [1,2,3,1,2,3,1,2,3],
                   'C': ['a', 'b', 'a', 'b', 'a', 'b', 'a','a', 'b']})

def testFun2(x):
    return pd.DataFrame({'xlen': x.shape[0]})

def testFun(x):
    b = x['B']
    print "b equals to {}".format(b) # This line prints okay
    c = x['C']
    out = pd.DataFrame()
    for a in x['A'].unique():
        subx = x[x.A == a]
        subxg = testFun2(subx)
        out = pd.concat([out, subxg])
    return out

df.groupby(['B', 'C']).apply(lambda x: testFun(x))

整个错误输出如下：

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-21-979d23aa904c> in <module>()
     18     return out
     19 
---> 20 df.groupby(['B', 'C']).apply(lambda x: testFun(x))

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\groupby\groupby.pyc in apply(self, func, *args, **kwargs)
    928 
    929                 with _group_selection_context(self):
--> 930                     return self._python_apply_general(f)
    931 
    932         return result

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\groupby\groupby.pyc in _python_apply_general(self, f)
    934     def _python_apply_general(self, f):
    935         keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 936                                                    self.axis)
    937 
    938         return self._wrap_applied_output(

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\groupby\groupby.pyc in apply(self, f, data, axis)
   2271             # group might be modified
   2272             group_axes = _get_axes(group)
-> 2273             res = f(group)
   2274             if not _is_indexed_like(res, group_axes):
   2275                 mutated = True

<ipython-input-21-979d23aa904c> in <lambda>(x)
     18     return out
     19 
---> 20 df.groupby(['B', 'C']).apply(lambda x: testFun(x))

<ipython-input-21-979d23aa904c> in testFun(x)
      9 
     10 def testFun(x):
---> 11     b = x['B']
     12     c = x['C']
     13     out = pd.DataFrame()

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
   2686             return self._getitem_multilevel(key)
   2687         else:
-> 2688             return self._getitem_column(key)
   2689 
   2690     def _getitem_column(self, key):

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _getitem_column(self, key)
   2693         # get column
   2694         if self.columns.is_unique:
-> 2695             return self._get_item_cache(key)
   2696 
   2697         # duplicate columns & possible reduce dimensionality

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
   2487         res = cache.get(item)
   2488         if res is None:
-> 2489             values = self._data.get(item)
   2490             res = self._box_item_values(item, values)
   2491             cache[item] = res

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\internals.pyc in get(self, item, fastpath)
   4113 
   4114             if not isna(item):
-> 4115                 loc = self.items.get_loc(item)
   4116             else:
   4117                 indexer = np.arange(len(self.items))[isna(self.items)]

C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\indexes\base.pyc in get_loc(self, key, method, tolerance)
   3078                 return self._engine.get_loc(key)
   3079             except KeyError:
-> 3080                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   3081 
   3082         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'B'

但是，我发现如果将testFun2更改为更简单的内容，例如：

def testFun2(x):
    return 1

然后将不会发生错误。这让我非常困惑-testFun2与b = x['B']行无关，对吗？为什么我首先遇到错误？谢谢！

熊猫groupby得到了KeyError

0 个答案: