我正在尝试将已转换为0101的信息转换为原始文本并计算准确率。
我的代码能够做到这一点吗? >>>“使用Python和决策树套件编写用于决策树模型培训和模型性能评估的程序。”
我想知道该怎么做。 here is my data
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
from sklearn import preprocessing
import pandas as pd
from sklearn import metrics
myData = open(r'C:\Users\USER\Desktop\all.csv')
reader = csv.reader(myData)
headers=next(reader)
print (headers)
le = preprocessing.LabelEncoder()
balance_data = balance_data.apply(le.fit_transform)
print ("tt,",balance_data)
Y = balance_data.values[:,0]
X = balance_data.values[:, 1:7]
X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size =0.3, random_state = 100)
clf_gini = DecisionTreeClassifier(criterion = "gini",
max_depth=5, min_samples_leaf=5)
clf_gini.fit(X_train, y_train)
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,
max_features=None, max_leaf_nodes=None, min_samples_leaf=5,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, splitter='best')
clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100,
max_depth=3, min_samples_leaf=5)
clf_entropy.fit(X_train, y_train)
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=3,
max_features=None, max_leaf_nodes=None, min_samples_leaf=5,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, splitter='best')
with open('ppp','w') as f:
f=tree.export_graphviz(clf_entropy,out_file=f)
y_pred_en = clf_entropy.predict(X_test)
y_pred_en
print("Accuracy is ", accuracy_score(y_test,y_pred_en))
y_pred_en = clf_entropy.predict(X_test)
y_pred_en
print("Accuracy is ", accuracy_score(y_test,y_pred_en))
accuracy = metrics.accuracy_score(y_test,y_pred_en)
print("asd",accuracy)