以下代码表示使用网格搜索方法调整装袋算法参数。在代码的每次执行中,即使我设置了每个模型决策树和装袋合奏的种子和random_state,我对于best_parameters也会得到不同的结果。有建议吗?
# Bagged Decision Trees for Classification
import pandas
from sklearn import model_selection
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from random import seed
seed=1
#X=datascaled.iloc[:,0:71]
#Selected_features=['Event','AVK','Beta blockers','proton pump inhibitor','Previous stroke','CYP2C19*17','Clopidogrel active metabolite','Obesity']
Selected_features=['Event time','CYP2C19*17','Clopidogrel active metabolite', 'proton pump inhibitor', 'DOSE BB','Previous stroke', 'Obesity','AVK']
X=datascaled[Selected_features]
Y=datascaled['Cardio1']
# Split the dataset in two equal parts
X_train, X_test, y_train, y_test =model_selection.train_test_split(
X,Y, test_size=0.3, random_state=seed)
param_grid = {
'base_estimator__max_depth' : [1, 2, 3, 4, 5],
'max_samples' : [0.05, 0.1, 0.2, 0.5], 'max_features' : [0.5, 1, 2],
'n_estimators' : [10,20,50, 100, 150, 200], #here you must add 'random_state':[123], 'n_jobs':[-1]
}
clf = GridSearchCV(BaggingClassifier(DecisionTreeClassifier(),
n_estimators = 50, max_features = 0.5),
param_grid,cv=10, scoring = 'accuracy')
clf.fit(X_train, y_train)
#The best hyper parameters set
print("Best Hyper Parameters:\n",clf.best_params_)
prediction=clf.predict(X_test)
#importing the metrics module
from sklearn import metrics
#evaluation(Accuracy)
print("Accuracy:",metrics.accuracy_score(prediction,y_test))
#evaluation(Confusion Metrix)
from sklearn.cross_validation import train_test_split,StratifiedShuffleSplit,cross_val_score
from sklearn import cross_validation
from sklearn.model_selection import StratifiedKFold
from time import *
from sklearn import metrics
n_folds=10
DTC = DecisionTreeClassifier(max_features=2, class_weight = "balanced",max_depth=4 ,random_state=seed)
#model=BaggingClassifier(base_estimator = DTC,random_state = 11, n_estimators= 50)
model=BaggingClassifier(base_estimator = DTC, max_samples= 0.5, n_estimators= 150)
cv = cross_validation.StratifiedKFold(Y, n_folds=n_folds, random_state=42)
t0 = time()
y_pred = cross_validation.cross_val_predict(model, X=X, y=Y, n_jobs=-1, cv=cv)
t = time() - t0
print("=" * 52)
print("time cost: {}".format(t))
print()
print("confusion matrix\n", metrics.confusion_matrix(Y, y_pred))
print()
print("\t\taccuracy: {}".format(metrics.accuracy_score(Y, y_pred)))
print("\t\troc_auc_score: {}".format(metrics.roc_auc_score(Y, y_pred)))
print(metrics.classification_report(Y, y_pred))
答案 0 :(得分:0)
当您同时患有 1.数据/机会不足,无法达成共识 2.模型很大程度上不适合,每次运行都是一个变种
答案 1 :(得分:0)
我确实通过在网格中添加<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns="http://xmlns.jcp.org/xml/ns/javaee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/javaee http://xmlns.jcp.org/xml/ns/javaee/web-app_4_0.xsd"
version="4.0">
<servlet>
<servlet-name>Faces Servlet</servlet-name>
<servlet-class>javax.faces.webapp.FacesServlet</servlet-class>
<load-on-startup>1</load-on-startup>
</servlet>
<servlet-mapping>
<servlet-name>Faces Servlet</servlet-name>
<url-pattern>*.xhtml</url-pattern>
</servlet-mapping>
<welcome-file-list>
<welcome-file>index.xhtml</welcome-file>
</welcome-file-list>
</web-app>
和random_state=123
解决了该问题。我对上面的代码做了更正,我将在上面的评论中提及它。