import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('trainEdited.csv')
X = dataset.iloc[:, :11].values
y = dataset.iloc[:, 11].values
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
X_L=LabelEncoder()
X[:,0:11]=X_L.fit_transform(X[:,0:11])
这些是变量的形状 X.shape
出[30]:(4990,11)
y.shape Out [31] :( 4990,)
dataset.shape Out [32] :( 4990,12)
谢谢
我尝试对每一列使用Label编码器,但出现了csr矩阵错误,这是什么意思?
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('trainEdited.csv')
X = dataset.iloc[:, :11].values
y = dataset.iloc[:, 11].values
X_n=X[:,7:8]
from sklearn.preprocessing import Imputer
imputer = Imputer(strategy="median")
imputer.fit(X_n)
X_n= imputer.transform(X_n)
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
X_L=LabelEncoder()
X[:,0]=X_L.fit_transform(X[:,0])
X[:,1]=X_L.fit_transform(X[:,1])
X[:,2]=X_L.fit_transform(X[:,2])
X[:,3]=X_L.fit_transform(X[:,3])
X[:,4]=X_L.fit_transform(X[:,4])
X[:,5]=X_L.fit_transform(X[:,5])
X[:,6]=X_L.fit_transform(X[:,6])
X[:,7]=X_L.fit_transform(X[:,7])
X[:,8]=X_L.fit_transform(X[:,8])