Pandas数据帧副本与原始数据不同

时间:2014-02-12 10:44:19

标签: python python-3.x pandas python-3.3

我刚刚参加了熊猫中一个非常奇怪的错误:

from pandas import *
cleanData = DataFrame({'Q18': {751.0: 'None',
  754.0: '3',
  757.0: '10',
  758.0: nan,
  759.0: nan,
  763.0: '40',
  766.0: nan,
  767.0: 'None',
  769.0: '1',
  770.0: nan}})

manualCorrections = \
{
 'Q18':
 {
  751: 0,
  767: 0,
  1097: 10
 }
}

manualCleanedData = cleanData.copy()

print(manualCleanedData['Q18'].loc[751])

for col_name, corrections in manualCorrections.items():
    for lab, val in corrections.items():
        manualCleanedData[col_name].loc[lab] = val

finalCleanData = manualCleanedData.copy()

print(manualCleanedData['Q18'].loc[751])
print(finalCleanData['Q18'].loc[751])

输出:

None
0
None

其中Nones是字符串'None',而不是python None

此外,

manualCleanedData == manualCleanedData.copy()

抛出异常:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-5d010e43aca8> in <module>()
----> 1 manualCleanedData == manualCleanedData.copy()

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/ops.py in f(self, other)
    873     def f(self, other):
    874         if isinstance(other, pd.DataFrame):    # Another DataFrame
--> 875             return self._compare_frame(other, func, str_rep)
    876         elif isinstance(other, pd.Series):
    877             return self._combine_series_infer(other, func)

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in _compare_frame(self, other, func, str_rep)
   2860             raise ValueError('Can only compare identically-labeled '
   2861                              'DataFrame objects')
-> 2862         return self._compare_frame_evaluate(other, func, str_rep)
   2863 
   2864     def _flex_compare_frame(self, other, func, str_rep, level):

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in _compare_frame_evaluate(self, other, func, str_rep)
   2842             def _compare(a, b):
   2843                 return dict([(col, func(a[col], b[col])) for col in a.columns])
-> 2844             new_data = expressions.evaluate(_compare, str_rep, self, other)
   2845             return self._constructor(data=new_data, index=self.index,
   2846                                      columns=self.columns, copy=False)

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/computation/expressions.py in evaluate(op, op_str, a, b, raise_on_error, use_numexpr, **eval_kwargs)
    174     if use_numexpr:
    175         return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
--> 176                          **eval_kwargs)
    177     return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)
    178 

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/computation/expressions.py in _evaluate_numexpr(op, op_str, a, b, raise_on_error, truediv, **eval_kwargs)
    114 
    115     if result is None:
--> 116         result = _evaluate_standard(op, op_str, a, b, raise_on_error)
    117 
    118     return result

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/computation/expressions.py in _evaluate_standard(op, op_str, a, b, raise_on_error, **eval_kwargs)
     62     if _TEST_MODE:
     63         _store_test_result(False)
---> 64     return op(a, b)
     65 
     66 

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in _compare(a, b)
   2841         if self.columns.is_unique:
   2842             def _compare(a, b):
-> 2843                 return dict([(col, func(a[col], b[col])) for col in a.columns])
   2844             new_data = expressions.evaluate(_compare, str_rep, self, other)
   2845             return self._constructor(data=new_data, index=self.index,

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in <listcomp>(.0)
   2841         if self.columns.is_unique:
   2842             def _compare(a, b):
-> 2843                 return dict([(col, func(a[col], b[col])) for col in a.columns])
   2844             new_data = expressions.evaluate(_compare, str_rep, self, other)
   2845             return self._constructor(data=new_data, index=self.index,

/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/ops.py in wrapper(self, other)
    540             name = _maybe_match_name(self, other)
    541             if len(self) != len(other):
--> 542                 raise ValueError('Series lengths must match to compare')
    543             return self._constructor(na_op(self.values, other.values),
    544                                      index=self.index, name=name)

ValueError: Series lengths must match to compare

从manualCorrections中删除1097:10条目可防止出现错误。

我只是愚蠢还是在这里有一些非常破碎的东西?

熊猫版:0.13.1

Python版本:3.3.3

操作系统:Linux x86_64

0 个答案:

没有答案