import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#Import Data set
dataset= pd.read_csv('Data.csv')
X = dataset.iloc[:,:-1].values
Y = dataset.iloc[:,3].values
#X[:,1:3].astype(str)
#Taking Care of The Missing Data
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values=np.nan,strategy='mean',axis=0)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#Taking care of Categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X = LabelEncoder()
X[:,0]= labelencoder_X.fit_transform(X[:,0])
#OneHot for Dummy Variables
onehotencoder= OneHotEncoder(categorical_features=[0])
X= onehotencoder.fit_transform(X).toarray()
labelencoder_Y = LabelEncoder()
Y= labelencoder_X.fit_transform(Y)
#Split data into Train and test
from sklearn.cross_validation import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,
random_state=0)
我正在按照这个教程系列,其中教师没有错误,但我这样做,它在最后一行,错误说明类型错误“<' 'float'和'str'的实例之间不支持。请帮忙。
答案 0 :(得分:0)
修正了我的问题。 csv文件中的数据已更改,因此导致错误。