我有一个熊猫DataFrameX。 我需要找到特定模型的预测解释。我的模型如下:
pipeline = Pipeline(steps= [
('imputer', get_imputer(
categorical_features=categorical_features,
real_features=real_features,
int_features=int_features,
)),
('classifier', RandomForestClassifier(criterion='gini', class_weight='balanced')),
])
print(int_features)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)
y_pred = pipeline.fit(x_train, y_train).predict(x_test)
# f1_score(y, y_pred)
现在作为预测专家,我使用Shap的Kernal Explainer。如下:
# use Kernel SHAP to explain test set predictions
shap.initjs()
explainer = shap.KernelExplainer(pipeline.predict_proba, x_train, link="logit")
shap_values = explainer.shap_values(x_test, nsamples=10)
# # plot the SHAP values for the Setosa output of the first instance
shap.force_plot(explainer.expected_value[0], shap_values[0][0,:], x_test.iloc[0,:], link="logit")
运行代码时出现错误消息
ValueError:仅熊猫数据框支持使用字符串指定列。
Provided model function fails when applied to the provided data set.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-396-50cda7e0af8e> in <module>
2 shap.initjs()
3 # x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)
----> 4 explainer = shap.KernelExplainer(pipeline.predict_proba, x_train, link="logit")
5 shap_values = explainer.shap_values(x_test, nsamples=10)
~/anaconda3/lib/python3.6/site-packages/shap/explainers/kernel.py in __init__(self, model, data, link, **kwargs)
95 self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
96 self.data = convert_to_data(data, keep_index=self.keep_index)
---> 97 model_null = match_model_to_data(self.model, self.data)
98
99 # enforce our current input type limitations
~/anaconda3/lib/python3.6/site-packages/shap/common.py in match_model_to_data(model, data)
80 out_val = model.f(data.convert_to_df())
81 else:
---> 82 out_val = model.f(data.data)
83 except:
84 print("Provided model function fails when applied to the provided data set.")
~/anaconda3/lib/python3.6/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
116
117 # lambda, but not partial, allows help() to work with update_wrapper
--> 118 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
119 # update the docstring of the returned function
120 update_wrapper(out, self.fn)
~/anaconda3/lib/python3.6/site-packages/sklearn/pipeline.py in predict_proba(self, X)
379 for name, transform in self.steps[:-1]:
380 if transform is not None:
--> 381 Xt = transform.transform(Xt)
382 return self.steps[-1][-1].predict_proba(Xt)
383
~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in transform(self, X)
491
492 X = _check_X(X)
--> 493 Xs = self._fit_transform(X, None, _transform_one, fitted=True)
494 self._validate_output(Xs)
495
~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
391 _get_column(X, column), y, weight)
392 for _, trans, column, weight in self._iter(
--> 393 fitted=fitted, replace_strings=True))
394 except ValueError as e:
395 if "Expected 2D array, got 1D array instead" in str(e):
~/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
~/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
752 tasks = BatchedCalls(itertools.islice(iterator, batch_size),
753 self._backend.get_nested_backend(),
--> 754 self._pickle_cache)
755 if len(tasks) == 0:
756 # No more tasks available in the iterator: tell caller to stop.
~/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __init__(self, iterator_slice, backend_and_jobs, pickle_cache)
208
209 def __init__(self, iterator_slice, backend_and_jobs, pickle_cache=None):
--> 210 self.items = list(iterator_slice)
211 self._size = len(self.items)
212 if isinstance(backend_and_jobs, tuple):
~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
390 delayed(func)(clone(trans) if not fitted else trans,
391 _get_column(X, column), y, weight)
--> 392 for _, trans, column, weight in self._iter(
393 fitted=fitted, replace_strings=True))
394 except ValueError as e:
~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in _get_column(X, key)
609 return X.loc[:, key]
610 else:
--> 611 raise ValueError("Specifying the columns using strings is only "
612 "supported for pandas DataFrames")
613 else:
ValueError: Specifying the columns using strings is only supported for pandas DataFrames
任何人都请帮助我。我真的很坚持。 x_train和x_test都是熊猫数据框。