我保存了用于预测的模型,所有模型预测都在第一个块代码中进行
#Splits
X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, test_size = 0.25, random_state = 0)
#Load Model
rf_model = pickle.load(open('rf_skl_model.sav', 'rb'))
#To Predict Full X_test from train_test_split
y_pred_rf_xtest = rf_model.predict_proba(X_test)
#To Predict one record from the split
y_pred_rf_xtest1 = rf_model.predict_proba(X_test[180].reshape(1,-1))
#Also to predict one record from csv
test = pd.read_csv('Test.csv',sep=',',decimal='.',header=0)
test_x = test.iloc[:,1:58]
test_y = test.iloc[:,0]
test2d = test_x.values.reshape(1,-1)
y_pred_rf = rf_model.predict_proba(test2d)
如果要解释石灰
import lime
import lime.lime_tabular
feature_names = train_data.columns
target = 'GoodvsBad'
feature_names = feature_names.drop(target)
rf_explainer = lime.lime_tabular.LimeTabularExplainer(X_train, mode='classification',training_labels=train_data['GoodvsBad'],feature_names=feature_names)
从这一点来看,我有两个问题。第一个问题是要获取拆分数据中单个测试记录的说明,下面使用该方法不会显示错误,但仅显示要素名称,变量的所有值均为零,绘图数据为空,我不知道怎么了
exp = rf_model_explainer.explain_instance(X_test[180], rf_model.predict_proba, num_features=10, top_labels=1)
exp.as_list()
fig = exp.as_pyplot_figure()
第二个问题,如果我使用从csv读取的测试数据并使用上述代码中的test2d,则会收到错误,因为“ ValueError:无法将形状(57)的输入数组广播到形状(1)”
exp2 = rf_model_explainer.explain_instance(test2d, rf_model.predict_proba, num_features=10, top_labels=1)
exp2.as_list()
除了用于解释图中变量的数据以及如何使用csv中的测试数据之外,我