Question

我正在尝试使用.combine_first（）组合两个熊猫DataFrame，但是，我遇到了一个错误。

df3 = df1.combine_first(df2)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-20-187e7aafc9a6> in <module>
----> 1 df3 = df1.combine_first(df2)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in combine_first(self, other)
   5362             return expressions.where(mask, y_values, x_values)
   5363 
-> 5364         return self.combine(other, combiner, overwrite=False)
   5365 
   5366     @deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors',

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in combine(self, other, func, fill_value, overwrite)
   5286         # convert_objects just in case
   5287         return self._constructor(result, index=new_index,
-> 5288                                  columns=new_columns)
   5289 
   5290     def combine_first(self, other):

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
    390                                  dtype=dtype, copy=copy)
    391         elif isinstance(data, dict):
--> 392             mgr = init_dict(data, index, columns, dtype=dtype)
    393         elif isinstance(data, ma.MaskedArray):
    394             import numpy.ma.mrecords as mrecords

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_dict(data, index, columns, dtype)
    175     if columns is not None:
    176         from pandas.core.series import Series
--> 177         arrays = Series(data, index=columns, dtype=object)
    178         data_names = arrays.index
    179 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    205                 data = data._data
    206             elif isinstance(data, dict):
--> 207                 data, index = self._init_dict(data, index, dtype)
    208                 dtype = None
    209                 copy = False

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in _init_dict(self, data, index, dtype)
    306         # Now we just make sure the order is respected, if any
    307         if data and index is not None:
--> 308             s = s.reindex(index, copy=False)
    309         elif not PY36 and not isinstance(data, OrderedDict) and data:
    310             # Need the `and data` to avoid sorting Series(None, index=[...])

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in reindex(self, index, **kwargs)
   3732     @Appender(generic.NDFrame.reindex.__doc__)
   3733     def reindex(self, index=None, **kwargs):
-> 3734         return super(Series, self).reindex(index=index, **kwargs)
   3735 
   3736     def drop(self, labels=None, axis=0, index=None, columns=None,

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in reindex(self, *args, **kwargs)
   4354         # perform the reindex on the axes
   4355         return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356                                   fill_value, copy).__finalize__(self)
   4357 
   4358     def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
   4367             ax = self._get_axis(a)
   4368             new_index, indexer = ax.reindex(labels, level=level, limit=limit,
-> 4369                                             tolerance=tolerance, method=method)
   4370 
   4371             axis = self._get_axis_number(a)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\multi.py in reindex(self, target, method, level, limit, tolerance)
   2225                                                tolerance=tolerance)
   2226                 else:
-> 2227                     raise ValueError("cannot handle a non-unique multi-index!")
   2228 
   2229         if not isinstance(target, MultiIndex):

ValueError: cannot handle a non-unique multi-index!

我检查了两个DataFrames的索引，但没有一个具有重复值。同样，两个DataFrame都没有重复的列标题。

我已经在不同的数据帧中使用了这种方法，并且没有遇到任何问题，所以我不能完全确定为什么问题出在这里。

df2没有df1中不存在的任何行或列索引，因此我也不认为这是个问题。

使用Combine_first（）合并两个DataFrame时出错

0 个答案: