我刚刚参加了熊猫中一个非常奇怪的错误:
from pandas import *
cleanData = DataFrame({'Q18': {751.0: 'None',
754.0: '3',
757.0: '10',
758.0: nan,
759.0: nan,
763.0: '40',
766.0: nan,
767.0: 'None',
769.0: '1',
770.0: nan}})
manualCorrections = \
{
'Q18':
{
751: 0,
767: 0,
1097: 10
}
}
manualCleanedData = cleanData.copy()
print(manualCleanedData['Q18'].loc[751])
for col_name, corrections in manualCorrections.items():
for lab, val in corrections.items():
manualCleanedData[col_name].loc[lab] = val
finalCleanData = manualCleanedData.copy()
print(manualCleanedData['Q18'].loc[751])
print(finalCleanData['Q18'].loc[751])
输出:
None
0
None
其中Nones是字符串'None',而不是python None
此外,
manualCleanedData == manualCleanedData.copy()
抛出异常:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-5d010e43aca8> in <module>()
----> 1 manualCleanedData == manualCleanedData.copy()
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/ops.py in f(self, other)
873 def f(self, other):
874 if isinstance(other, pd.DataFrame): # Another DataFrame
--> 875 return self._compare_frame(other, func, str_rep)
876 elif isinstance(other, pd.Series):
877 return self._combine_series_infer(other, func)
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in _compare_frame(self, other, func, str_rep)
2860 raise ValueError('Can only compare identically-labeled '
2861 'DataFrame objects')
-> 2862 return self._compare_frame_evaluate(other, func, str_rep)
2863
2864 def _flex_compare_frame(self, other, func, str_rep, level):
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in _compare_frame_evaluate(self, other, func, str_rep)
2842 def _compare(a, b):
2843 return dict([(col, func(a[col], b[col])) for col in a.columns])
-> 2844 new_data = expressions.evaluate(_compare, str_rep, self, other)
2845 return self._constructor(data=new_data, index=self.index,
2846 columns=self.columns, copy=False)
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/computation/expressions.py in evaluate(op, op_str, a, b, raise_on_error, use_numexpr, **eval_kwargs)
174 if use_numexpr:
175 return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
--> 176 **eval_kwargs)
177 return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)
178
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/computation/expressions.py in _evaluate_numexpr(op, op_str, a, b, raise_on_error, truediv, **eval_kwargs)
114
115 if result is None:
--> 116 result = _evaluate_standard(op, op_str, a, b, raise_on_error)
117
118 return result
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/computation/expressions.py in _evaluate_standard(op, op_str, a, b, raise_on_error, **eval_kwargs)
62 if _TEST_MODE:
63 _store_test_result(False)
---> 64 return op(a, b)
65
66
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in _compare(a, b)
2841 if self.columns.is_unique:
2842 def _compare(a, b):
-> 2843 return dict([(col, func(a[col], b[col])) for col in a.columns])
2844 new_data = expressions.evaluate(_compare, str_rep, self, other)
2845 return self._constructor(data=new_data, index=self.index,
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/frame.py in <listcomp>(.0)
2841 if self.columns.is_unique:
2842 def _compare(a, b):
-> 2843 return dict([(col, func(a[col], b[col])) for col in a.columns])
2844 new_data = expressions.evaluate(_compare, str_rep, self, other)
2845 return self._constructor(data=new_data, index=self.index,
/home/space/phujfj/Installations/Python3/lib/python3.3/site-packages/pandas-0.13.1-py3.3-linux-x86_64.egg/pandas/core/ops.py in wrapper(self, other)
540 name = _maybe_match_name(self, other)
541 if len(self) != len(other):
--> 542 raise ValueError('Series lengths must match to compare')
543 return self._constructor(na_op(self.values, other.values),
544 index=self.index, name=name)
ValueError: Series lengths must match to compare
从manualCorrections中删除1097:10条目可防止出现错误。
我只是愚蠢还是在这里有一些非常破碎的东西?
熊猫版:0.13.1
Python版本:3.3.3
操作系统:Linux x86_64