代码->
# Data Preprocessing
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, :3].values
# Taking care of missing data
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
imputer = imputer.fit(X[:, 1:3])
X[:, 1:3] = imputer.transform(X[:, 1:3])
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder
labelencoder_X = LabelEncoder()
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=np.str)
from sklearn.preprocessing import LabelEncoder
Y = LabelEncoder().fit_transform(Y)
错误->
X = np.array(ct.fit_transform(X), dtype=np.float)
C:\Users\DELL PC\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py:415: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.
If you want the future behaviour and silence this warning, you can specify "categories='auto'".
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.
warnings.warn(msg, FutureWarning)
from sklearn.preprocessing import LabelEncoder
Y = LabelEncoder().fit_transform(Y)
Traceback (most recent call last):
File "<ipython-input-21-35a352cd76da>", line 2, in <module>
Y = LabelEncoder().fit_transform(Y)
File "C:\Users\DELL PC\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py", line 235, in fit_transform
y = column_or_1d(y, warn=True)
File "C:\Users\DELL PC\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 760, in column_or_1d
raise ValueError("bad input shape {0}".format(shape))
ValueError: bad input shape (10, 3)
Data.csv Data file
答案 0 :(得分:1)
Y = dataset.iloc[:, :3].values
实际上为您提供了0:2的所有列。我假设您真正想要的是:
Y = dataset.iloc[:, 2].values