在索引列
上加入keyerror的结果# Import libraries
import pandas as pd
import numpy as np
# Open and load all files indexed by 'ISI_LOC'
df_all = pd.read_csv('AUTHORS.csv', index_col='ISI_LOC', dtype={'ISI_LOC':str, 'POSITION':int}, engine='c', low_memory=False)
df_addresses = pd.read_csv('ADDRESSES.csv', index_col='ISI_LOC', dtype={'ISI_LOC': str, 'POSITION':int, 'Seg1':str }, engine='c', low_memory=False)
# There are more, but for the sake of brevity...
对数据帧的检查显示字符串上的索引符合预期
# Goal: df_all.join([df_addresses, df_catagories, df_keywordsplus, df_articles])
df_all.join(df_addresses, on='ISI_LOC')
这导致:
KeyError Traceback (most recent call last)
<ipython-input-17-35d37498b69e> in <module>()
1 # df_all.join([df_addresses, df_catagories, df_keywordsplus, df_articles])
----> 2 df_all.join(df_addresses, on='ISI_LOC')
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in join(self, other, on, how, lsuffix, rsuffix, sort)
3865 # For SparseDataFrame's benefit
3866 return self._join_compat(other, on=on, how=how, lsuffix=lsuffix,
-> 3867 rsuffix=rsuffix, sort=sort)
3868
3869 def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _join_compat(self, other, on, how, lsuffix, rsuffix, sort)
3879 return merge(self, other, left_on=on, how=how,
3880 left_index=on is None, right_index=True,
-> 3881 suffixes=(lsuffix, rsuffix), sort=sort)
3882 else:
3883 if on is not None:
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
36 right_on=right_on, left_index=left_index,
37 right_index=right_index, sort=sort, suffixes=suffixes,
---> 38 copy=copy)
39 return op.get_result()
40 if __debug__:
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)
182 (self.left_join_keys,
183 self.right_join_keys,
--> 184 self.join_names) = self._get_merge_keys()
185
186 def get_result(self):
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tools\merge.py in _get_merge_keys(self)
359 join_names.append(None)
360 else:
--> 361 left_keys.append(left[k].values)
362 join_names.append(k)
363 if isinstance(self.right.index, MultiIndex):
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1778 return self._getitem_multilevel(key)
1779 else:
-> 1780 return self._getitem_column(key)
1781
1782 def _getitem_column(self, key):
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1785 # get column
1786 if self.columns.is_unique:
-> 1787 return self._get_item_cache(key)
1788
1789 # duplicate columns & possible reduce dimensionaility
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1066 res = cache.get(item)
1067 if res is None:
-> 1068 values = self._data.get(item)
1069 res = self._box_item_values(item, values)
1070 cache[item] = res
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
2847
2848 if not isnull(item):
-> 2849 loc = self.items.get_loc(item)
2850 else:
2851 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Users\430010958\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key)
1400 loc : int if unique index, possibly slice or mask if not
1401 """
-> 1402 return self._engine.get_loc(_values_from_object(key))
1403
1404 def get_value(self, series, key):
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3807)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3687)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12310)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12261)()
KeyError: 'ISI_LOC'
是的,我可以使用其他方法。事实上,我使用这种语法以相当复杂的方式工作,所以我知道数据格式正确:
df_catagories = pd.concat([df_catagories, df_keywordsplus], keys='ISI_LOC')
哪个有效,但不是我喜欢的方式。我在连接声明中缺少什么?我玩过&#39; how =&#39;和其他参数没有成功。