使用pandas和sklearn创建一个决策树来学习数据,其中我的树的修剪方法是重试不同的最大深度。我相信我有一切工作,但我似乎无法通过pyplot输出它。有人可以帮助我吗
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn import cross_validation
from sklearn.cross_validation import KFold
import matplotlib.pyplot as plt
features = ['birad','age','Shape','margin','density','severity']
df = pd.read_csv('mammographic_masses.data',header=None,names=features)
df= df[df.birad != '?']
df= df[df.age != '?']
df= df[df.Shape != '?']
df= df[df.margin != '?']
df= df[df.density != '?']
#df= df[df.severity != '?']
x = df[features[:-1]]
y = df['severity']
x_train,x_test,y_train,y_test = cross_validation.train_test_split(x,y,test_size=0.4,random_state=0)
depth = []
best_depth = 3
best_score = 0
best_clf = []
for i in range(1,20):
clf = tree.DecisionTreeClassifier(max_depth=i)
clf = clf.fit(x_train,y_train)
scores = cross_validation.cross_val_score(clf,x_train,y_train,cv=10)
ascore = clf.score(x_test,y_test)
depth.append((i,clf.score(x_test,y_test)))
if ascore > best_score:
best_score,best_depth = ascore,i
best_clf.append(clf)
print best_depth,' ',best_score
答案 0 :(得分:0)
Just a guess, since you didn't show where you were trying to plot data: did you call
plt.show()
after generating your plots? They won't actually be displayed until you do that.