Pandas在索引列上加入keyerror的结果

时间:2015-03-21 16:22:52

标签: python join pandas merge concat

在索引列

上加入keyerror的结果
# Import libraries 
import pandas as pd
import numpy as np

# Open and load all files indexed by 'ISI_LOC'
df_all = pd.read_csv('AUTHORS.csv', index_col='ISI_LOC', dtype={'ISI_LOC':str, 'POSITION':int}, engine='c', low_memory=False)
df_addresses = pd.read_csv('ADDRESSES.csv', index_col='ISI_LOC', dtype={'ISI_LOC': str, 'POSITION':int, 'Seg1':str }, engine='c', low_memory=False) 
# There are more, but for the sake of brevity...

对数据帧的检查显示字符串上的索引符合预期

# Goal: df_all.join([df_addresses, df_catagories, df_keywordsplus, df_articles])
df_all.join(df_addresses, on='ISI_LOC')

这导致:

KeyError                                  Traceback (most recent call last)
<ipython-input-17-35d37498b69e> in <module>()
      1 # df_all.join([df_addresses, df_catagories, df_keywordsplus, df_articles])
----> 2 df_all.join(df_addresses, on='ISI_LOC')

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in join(self, other, on, how, lsuffix, rsuffix, sort)
   3865         # For SparseDataFrame's benefit
   3866         return self._join_compat(other, on=on, how=how, lsuffix=lsuffix,
-> 3867                                  rsuffix=rsuffix, sort=sort)
   3868 
   3869     def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _join_compat(self, other, on, how, lsuffix, rsuffix, sort)
   3879             return merge(self, other, left_on=on, how=how,
   3880                          left_index=on is None, right_index=True,
-> 3881                          suffixes=(lsuffix, rsuffix), sort=sort)
   3882         else:
   3883             if on is not None:

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
     36                          right_on=right_on, left_index=left_index,
     37                          right_index=right_index, sort=sort, suffixes=suffixes,
---> 38                          copy=copy)
     39     return op.get_result()
     40 if __debug__:

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)
    182         (self.left_join_keys,
    183          self.right_join_keys,
--> 184          self.join_names) = self._get_merge_keys()
    185 
    186     def get_result(self):

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in _get_merge_keys(self)
    359                     join_names.append(None)
    360                 else:
--> 361                     left_keys.append(left[k].values)
    362                     join_names.append(k)
    363             if isinstance(self.right.index, MultiIndex):

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   1778             return self._getitem_multilevel(key)
   1779         else:
-> 1780             return self._getitem_column(key)
   1781 
   1782     def _getitem_column(self, key):

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
   1785         # get column
   1786         if self.columns.is_unique:
-> 1787             return self._get_item_cache(key)
   1788 
   1789         # duplicate columns & possible reduce dimensionaility

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
   1066         res = cache.get(item)
   1067         if res is None:
-> 1068             values = self._data.get(item)
   1069             res = self._box_item_values(item, values)
   1070             cache[item] = res

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
   2847 
   2848             if not isnull(item):
-> 2849                 loc = self.items.get_loc(item)
   2850             else:
   2851                 indexer = np.arange(len(self.items))[isnull(self.items)]

C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key)
   1400         loc : int if unique index, possibly slice or mask if not
   1401         """
-> 1402         return self._engine.get_loc(_values_from_object(key))
   1403 
   1404     def get_value(self, series, key):

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3807)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3687)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12310)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12261)()

KeyError: 'ISI_LOC'

是的,我可以使用其他方法。事实上,我使用这种语法以相当复杂的方式工作,所以我知道数据格式正确:

df_catagories = pd.concat([df_catagories, df_keywordsplus], keys='ISI_LOC')

哪个有效,但不是我喜欢的方式。我在连接声明中缺少什么?我玩过&#39; how =&#39;和其他参数没有成功。

0 个答案:

没有答案