输入包含无穷大或dtype('float64')的值太大

时间:2019-06-16 18:25:20

标签: pandas scikit-learn sklearn-pandas

我在这里看到了许多类似的问题,但是没有一个答案能解决我的问题。

我正在尝试在数据集中进行Power Transform,但仍然会遇到此类错误。

数据集不包含inf或nan值,并且我确保它们不大于float64.max。我还尝试过重新索引数据框。

features_training = features_training.astype(np.float64)
target_training = target_training.astype(np.float64)
features_test = features_test.astype(np.float64)
target_test = target_test.astype(np.float64)

print(np.where(features_training.values >= np.finfo(np.float64).max))
print(np.where(features_test.values >= np.finfo(np.float64).max))
print(np.where(target_training.values >= np.finfo(np.float64).max))
print(np.where(target_test.values >= np.finfo(np.float64).max))

print(np.isnan(features_training.values).any())
print(np.isnan(features_test.values).any())
print(np.isnan(target_training.values).any())
print(np.isnan(target_test.values).any())

print(np.isinf(features_training.values).any())
print(np.isinf(features_test.values).any())
print(np.isinf(target_training.values).any())
print(np.isinf(target_test.values).any()) 

pt_X = PowerTransformer().fit(features_training)
pt_Y = PowerTransformer().fit(np.asarray(target_training).reshape(-1,1))

features_training = pt_X.transform(features_training)
target_training = pt_Y.transform(np.asarray(target_training).reshape(-1,1))

features_test = pt_X.transform(features_test)
target_test = pt_Y.transform(np.asarray(target_test).reshape(-1,1))

使用dataframe.info()

features training
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Columns: 138 entries
dtypes: float64(138)
memory usage: 545.6 KB
None

target training
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 1 columns):
506 non-null float64
dtypes: float64(1)
memory usage: 4.0 KB
None

features test
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Columns: 138 entries
dtypes: float64(138)
memory usage: 519.7 KB
None

target test
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 1 columns):
482 non-null float64
dtypes: float64(1)
memory usage: 3.8 KB
None

错误回溯

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-100-6ca93dd1855a> in <module>
     21     # features already normalized. Target remains the same
     22     features_training, features_test, target_training, target_test, ptX_, pt_Y = normalization(features_training, features_test,
---> 23                                                                                            target_training, target_test)
     24 
     25     model.fit(features_training, target_training)

<ipython-input-99-9199a48b9d30> in normalization(features_training, features_test, target_training, target_test)
     47     target_training = pt_Y.transform(np.asarray(target_training).reshape(-1,1))
     48 
---> 49     features_test = pt_X.transform(features_test)
     50     target_test = pt_Y.transform(np.asarray(target_test).reshape(-1,1))
     51 

~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\preprocessing\data.py in transform(self, X)
   2731 
   2732         if self.standardize:
-> 2733             X = self._scaler.transform(X)
   2734 
   2735         return X

~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\preprocessing\data.py in transform(self, X, copy)
    756         X = check_array(X, accept_sparse='csr', copy=copy,
    757                         estimator=self, dtype=FLOAT_DTYPES,
--> 758                         force_all_finite='allow-nan')
    759 
    760         if sparse.issparse(X):

~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    540         if force_all_finite:
    541             _assert_all_finite(array,
--> 542                                allow_nan=force_all_finite == 'allow-nan')
    543 
    544     if ensure_min_samples > 0:

~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\utils\validation.py in _assert_all_finite(X, allow_nan)
     54                 not allow_nan and not np.isfinite(X).all()):
     55             type_err = 'infinity' if allow_nan else 'NaN, infinity'
---> 56             raise ValueError(msg_err.format(type_err, X.dtype))
     57     # for object dtype data, we only check for NaNs (GH-13254)
     58     elif X.dtype == np.dtype('object') and not allow_nan:

ValueError: Input contains infinity or a value too large for dtype('float64').

0 个答案:

没有答案