我在这里看到了许多类似的问题,但是没有一个答案能解决我的问题。
我正在尝试在数据集中进行Power Transform,但仍然会遇到此类错误。
数据集不包含inf或nan值,并且我确保它们不大于float64.max。我还尝试过重新索引数据框。
features_training = features_training.astype(np.float64)
target_training = target_training.astype(np.float64)
features_test = features_test.astype(np.float64)
target_test = target_test.astype(np.float64)
print(np.where(features_training.values >= np.finfo(np.float64).max))
print(np.where(features_test.values >= np.finfo(np.float64).max))
print(np.where(target_training.values >= np.finfo(np.float64).max))
print(np.where(target_test.values >= np.finfo(np.float64).max))
print(np.isnan(features_training.values).any())
print(np.isnan(features_test.values).any())
print(np.isnan(target_training.values).any())
print(np.isnan(target_test.values).any())
print(np.isinf(features_training.values).any())
print(np.isinf(features_test.values).any())
print(np.isinf(target_training.values).any())
print(np.isinf(target_test.values).any())
pt_X = PowerTransformer().fit(features_training)
pt_Y = PowerTransformer().fit(np.asarray(target_training).reshape(-1,1))
features_training = pt_X.transform(features_training)
target_training = pt_Y.transform(np.asarray(target_training).reshape(-1,1))
features_test = pt_X.transform(features_test)
target_test = pt_Y.transform(np.asarray(target_test).reshape(-1,1))
使用dataframe.info()
features training
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Columns: 138 entries
dtypes: float64(138)
memory usage: 545.6 KB
None
target training
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 1 columns):
506 non-null float64
dtypes: float64(1)
memory usage: 4.0 KB
None
features test
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Columns: 138 entries
dtypes: float64(138)
memory usage: 519.7 KB
None
target test
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 1 columns):
482 non-null float64
dtypes: float64(1)
memory usage: 3.8 KB
None
错误回溯
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-100-6ca93dd1855a> in <module>
21 # features already normalized. Target remains the same
22 features_training, features_test, target_training, target_test, ptX_, pt_Y = normalization(features_training, features_test,
---> 23 target_training, target_test)
24
25 model.fit(features_training, target_training)
<ipython-input-99-9199a48b9d30> in normalization(features_training, features_test, target_training, target_test)
47 target_training = pt_Y.transform(np.asarray(target_training).reshape(-1,1))
48
---> 49 features_test = pt_X.transform(features_test)
50 target_test = pt_Y.transform(np.asarray(target_test).reshape(-1,1))
51
~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\preprocessing\data.py in transform(self, X)
2731
2732 if self.standardize:
-> 2733 X = self._scaler.transform(X)
2734
2735 return X
~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\preprocessing\data.py in transform(self, X, copy)
756 X = check_array(X, accept_sparse='csr', copy=copy,
757 estimator=self, dtype=FLOAT_DTYPES,
--> 758 force_all_finite='allow-nan')
759
760 if sparse.issparse(X):
~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
540 if force_all_finite:
541 _assert_all_finite(array,
--> 542 allow_nan=force_all_finite == 'allow-nan')
543
544 if ensure_min_samples > 0:
~\AppData\Local\Continuum\anaconda2\envs\env36\lib\site-packages\sklearn\utils\validation.py in _assert_all_finite(X, allow_nan)
54 not allow_nan and not np.isfinite(X).all()):
55 type_err = 'infinity' if allow_nan else 'NaN, infinity'
---> 56 raise ValueError(msg_err.format(type_err, X.dtype))
57 # for object dtype data, we only check for NaNs (GH-13254)
58 elif X.dtype == np.dtype('object') and not allow_nan:
ValueError: Input contains infinity or a value too large for dtype('float64').