我试图合并两个不同长度的数据帧(称为df1和df2),这两个数据帧都按其日期索引。 dfs(df1)的较长时间内所有日期都列在两者中较短的一个(df2)。我尝试使用以下命令将它们组合在一起:merged = df2.merge(df1, on='Date')
但是我遇到了以下错误,当我尝试这样做时我不明白这些错误。
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-47-e8d3e1ec920d> in <module>()
----> 1 merged = df2.merge(df1, on='Date')
/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
3630 left_on=left_on, right_on=right_on,
3631 left_index=left_index, right_index=right_index, sort=sort,
-> 3632 suffixes=suffixes, copy=copy)
3633
3634 #----------------------------------------------------------------------
/usr/lib/python2.7/dist-packages/pandas/tools/merge.pyc in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
37 right_on=right_on, left_index=left_index,
38 right_index=right_index, sort=sort, suffixes=suffixes,
---> 39 copy=copy)
40 return op.get_result()
41 if __debug__:
/usr/lib/python2.7/dist-packages/pandas/tools/merge.pyc in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)
181 (self.left_join_keys,
182 self.right_join_keys,
--> 183 self.join_names) = self._get_merge_keys()
184
185 def get_result(self):
/usr/lib/python2.7/dist-packages/pandas/tools/merge.pyc in _get_merge_keys(self)
324 else:
325 if not is_rkey(rk):
--> 326 right_keys.append(right[rk].values)
327 if lk == rk:
328 # avoid key upcast in corner case (length-0)
/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in __getitem__(self, key)
1656 return self._getitem_multilevel(key)
1657 else:
-> 1658 return self._getitem_column(key)
1659
1660 def _getitem_column(self, key):
/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in _getitem_column(self, key)
1663 # get column
1664 if self.columns.is_unique:
-> 1665 return self._get_item_cache(key)
1666
1667 # duplicate columns & possible reduce dimensionaility
/usr/lib/python2.7/dist-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
1003 res = cache.get(item)
1004 if res is None:
-> 1005 values = self._data.get(item)
1006 res = self._box_item_values(item, values)
1007 cache[item] = res
/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in get(self, item)
2872 return self.get_for_nan_indexer(indexer)
2873
-> 2874 _, block = self._find_block(item)
2875 return block.get(item)
2876 else:
/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in _find_block(self, item)
3184
3185 def _find_block(self, item):
-> 3186 self._check_have(item)
3187 for i, block in enumerate(self.blocks):
3188 if item in block:
/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in _check_have(self, item)
3191 def _check_have(self, item):
3192 if item not in self.items:
-> 3193 raise KeyError('no item named %s' % com.pprint_thing(item))
3194
3195 def reindex_axis(self, new_axis, indexer=None, method=None, axis=0,
KeyError: u'no item named Date'
我还尝试删除on='Date'
,因为两者都已按日期编入索引,但结果似乎相同。知道我可能会出错的地方吗?
答案 0 :(得分:4)
我认为使用join
是最自然的,因为默认情况下它会合并在索引上。如下所示:
merged = df2.join(df1,rsuffix='_y')
添加rsuffix='_y'
,因为两个数据框中都有通用列名。