import numpy as np
import pandas as pd
#Plotting
import matplotlib.pyplot as plt
#Machine Learning Libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
#Loading Dataset
My_dataset = pd.read_csv('mushrooms.csv');
print (My_dataset.head())
print (My_dataset.shape)
#Dividing the datasets into Indicator and Predictor Variables
My_data = My_dataset.iloc[:,1:23].values
My_target = My_dataset.iloc[:,0].values
print()
print(My_data)
print()
print(My_target)
mushroom_train,mushroom_test,mushroomtarget_train,mushroomtarget_test = \
train_test_split(My_data,My_target, test_size = 0.3)
DT_Model_Mushroom = tree.DecisionTreeClassifier()
DT_Model_Mushroom_Fitted = DT_Model_Mushroom.fit(mushroom_train, mushroomtarget_train)
错误:
Error
return array(a, dtype, copy=False, order=order)
ValueError: could not convert string to float: 'f'
答案 0 :(得分:0)
scikit学习中的决策树分类器不将字符串作为输入。
如果数据中包含分类变量,则应事先对其进行编码(使用sklearn编码器之一,例如One hot encoder,Ordinal Encoder,...)
如果您的数据中没有分类变量,则熊猫可能无法正确将类型归因于您的列。如果发生这种情况,则应使用read_csv函数的“ dtype”参数。