我有以下数据框
df1 = df[['tripduration','starttime','stoptime','start station name','end station name','bikeid','usertype','birth year','gender']]
print(df1.head(2))
打印以下内容
tripduration starttime stoptime start station name \
0 364 2017-09-01 00:02:01 2017-09-01 00:08:05 Exchange Place
1 357 2017-09-01 00:08:12 2017-09-01 00:14:09 Warren St
end station name bikeid usertype birth year gender
0 Marin Light Rail 29670 Subscriber 1989.0 1
1 Newport Pkwy 26163 Subscriber 1980.0 1
我使用以下代码转换"出生年份"列类型从float到int。
df1[['birth year']] = df1[['birth year']].astype(int)
print df1.head(2)
但是我收到以下错误。如何解决这个问题?
ValueErrorTraceback (most recent call last)
<ipython-input-25-0fe766e4d4a7> in <module>()
----> 1 df1[['birth year']] = df1[['birth year']].astype(int)
2 print df1.head(2)
3 __zeppelin__._displayhook()
/usr/miniconda2/lib/python2.7/site-packages/pandas/util/_decorators.pyc in wrapper(*args, **kwargs)
116 else:
117 kwargs[new_arg_name] = new_arg_value
--> 118 return func(*args, **kwargs)
119 return wrapper
120 return _deprecate_kwarg
/usr/miniconda2/lib/python2.7/site-packages/pandas/core/generic.pyc in astype(self, dtype, copy, errors, **kwargs)
4002 # else, only a single dtype is given
4003 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 4004 **kwargs)
4005 return self._constructor(new_data).__finalize__(self)
4006
/usr/miniconda2/lib/python2.7/site-packages/pandas/core/internals.pyc in astype(self, dtype, **kwargs)
3460
3461 def astype(self, dtype, **kwargs):
-> 3462 return self.apply('astype', dtype=dtype, **kwargs)
3463
3464 def convert(self, **kwargs):
/usr/miniconda2/lib/python2.7/site-packages/pandas/core/internals.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3327
3328 kwargs['mgr'] = self
-> 3329 applied = getattr(b, f)(**kwargs)
3330 result_blocks = _extend_blocks(applied, result_blocks)
3331
/usr/miniconda2/lib/python2.7/site-packages/pandas/core/internals.pyc in astype(self, dtype, copy, errors, values, **kwargs)
542 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
543 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 544 **kwargs)
545
546 def _astype(self, dtype, copy=False, errors='raise', values=None,
/usr/miniconda2/lib/python2.7/site-packages/pandas/core/internals.pyc in _astype(self, dtype, copy, errors, values, klass, mgr, **kwargs)
623
624 # _astype_nansafe works fine with 1-d only
--> 625 values = astype_nansafe(values.ravel(), dtype, copy=True)
626 values = values.reshape(self.shape)
627
/usr/miniconda2/lib/python2.7/site-packages/pandas/core/dtypes/cast.pyc in astype_nansafe(arr, dtype, copy)
685
686 if not np.isfinite(arr).all():
--> 687 raise ValueError('Cannot convert non-finite values (NA or inf) to '
688 'integer')
689
ValueError: Cannot convert non-finite values (NA or inf) to integer
答案 0 :(得分:4)
如果你的DF很大,你很可能看不到丢失的数字。但您可以使用fillna
功能来帮助
>>> df = pd.DataFrame(data=data, columns=['id', 'birth_year'])
>>> df
id birth_year
0 1 1989.0
1 2 1990.0
2 3 NaN
>>> df.birth_year
0 1989.0
1 1990.0
2 NaN
Name: birth_year, dtype: float64
>>> df.birth_year.astype(int)
ERROR |2018.01.29T18:14:04|default:183: Unhandled Terminal Exception
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/devtools/uat/anaconda4321/lib/python3.6/site-
packages/pandas/util/_decorators.py", line 91, in wrapper
return func(*args, **kwargs)
File "/usr/local/devtools/uat/anaconda4321/lib/python3.6/site-
packages/pandas/core/generic.py", line 3410, in astype
**kwargs)
File "/usr/local/devtools/uat/anaconda4321/lib/python3.6/site-
packages/pandas/core/internals.py", line 3224, in astype
return self.apply('astype', dtype=dtype, **kwargs)
File "/usr/local/devtools/uat/anaconda4321/lib/python3.6/site-
packages/pandas/core/internals.py", line 3091, in apply
applied = getattr(b, f)(**kwargs)
File "/usr/local/devtools/uat/anaconda4321/lib/python3.6/site-
packages/pandas/core/internals.py", line 471, in astype
**kwargs)
File "/usr/local/devtools/uat/anaconda4321/lib/python3.6/site-
packages/pandas/core/internals.py", line 521, in _astype
values = astype_nansafe(values.ravel(), dtype, copy=True)
File "/usr/local/devtools/uat/anaconda4321/lib/python3.6/site-
packages/pandas/core/dtypes/cast.py", line 620, in astype_nansafe
raise ValueError('Cannot convert non-finite values (NA or inf) to '
ValueError: Cannot convert non-finite values (NA or inf) to integer
>>> df = df.fillna(0)
>>> df.birth_year.astype(int)
0 1989
1 1990
2 0
Name: birth_year, dtype: int64