我正在尝试使用.combine_first()组合两个熊猫DataFrame,但是,我遇到了一个错误。
df3 = df1.combine_first(df2)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-20-187e7aafc9a6> in <module>
----> 1 df3 = df1.combine_first(df2)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in combine_first(self, other)
5362 return expressions.where(mask, y_values, x_values)
5363
-> 5364 return self.combine(other, combiner, overwrite=False)
5365
5366 @deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors',
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in combine(self, other, func, fill_value, overwrite)
5286 # convert_objects just in case
5287 return self._constructor(result, index=new_index,
-> 5288 columns=new_columns)
5289
5290 def combine_first(self, other):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
390 dtype=dtype, copy=copy)
391 elif isinstance(data, dict):
--> 392 mgr = init_dict(data, index, columns, dtype=dtype)
393 elif isinstance(data, ma.MaskedArray):
394 import numpy.ma.mrecords as mrecords
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_dict(data, index, columns, dtype)
175 if columns is not None:
176 from pandas.core.series import Series
--> 177 arrays = Series(data, index=columns, dtype=object)
178 data_names = arrays.index
179
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
205 data = data._data
206 elif isinstance(data, dict):
--> 207 data, index = self._init_dict(data, index, dtype)
208 dtype = None
209 copy = False
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in _init_dict(self, data, index, dtype)
306 # Now we just make sure the order is respected, if any
307 if data and index is not None:
--> 308 s = s.reindex(index, copy=False)
309 elif not PY36 and not isinstance(data, OrderedDict) and data:
310 # Need the `and data` to avoid sorting Series(None, index=[...])
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in reindex(self, index, **kwargs)
3732 @Appender(generic.NDFrame.reindex.__doc__)
3733 def reindex(self, index=None, **kwargs):
-> 3734 return super(Series, self).reindex(index=index, **kwargs)
3735
3736 def drop(self, labels=None, axis=0, index=None, columns=None,
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in reindex(self, *args, **kwargs)
4354 # perform the reindex on the axes
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4357
4358 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4367 ax = self._get_axis(a)
4368 new_index, indexer = ax.reindex(labels, level=level, limit=limit,
-> 4369 tolerance=tolerance, method=method)
4370
4371 axis = self._get_axis_number(a)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\multi.py in reindex(self, target, method, level, limit, tolerance)
2225 tolerance=tolerance)
2226 else:
-> 2227 raise ValueError("cannot handle a non-unique multi-index!")
2228
2229 if not isinstance(target, MultiIndex):
ValueError: cannot handle a non-unique multi-index!
我检查了两个DataFrames的索引,但没有一个具有重复值。同样,两个DataFrame都没有重复的列标题。
我已经在不同的数据帧中使用了这种方法,并且没有遇到任何问题,所以我不能完全确定为什么问题出在这里。
df2没有df1中不存在的任何行或列索引,因此我也不认为这是个问题。