Question

我有一个熊猫DataFrameX。我需要找到特定模型的预测解释。我的模型如下：

pipeline = Pipeline(steps= [
        ('imputer', get_imputer(
            categorical_features=categorical_features,
            real_features=real_features,
            int_features=int_features,
        )),
        ('classifier', RandomForestClassifier(criterion='gini', class_weight='balanced')),
    ])
print(int_features)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)
y_pred = pipeline.fit(x_train, y_train).predict(x_test)

# f1_score(y, y_pred)

现在作为预测专家，我使用Shap的Kernal Explainer。如下：

# use Kernel SHAP to explain test set predictions
shap.initjs()

explainer = shap.KernelExplainer(pipeline.predict_proba, x_train, link="logit")

shap_values = explainer.shap_values(x_test, nsamples=10)

# # plot the SHAP values for the Setosa output of the first instance
shap.force_plot(explainer.expected_value[0], shap_values[0][0,:], x_test.iloc[0,:], link="logit")

运行代码时出现错误消息
ValueError：仅熊猫数据框支持使用字符串指定列。

Provided model function fails when applied to the provided data set.

    ---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
    <ipython-input-396-50cda7e0af8e> in <module>
          2 shap.initjs()
          3 # x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)
    ----> 4 explainer = shap.KernelExplainer(pipeline.predict_proba, x_train, link="logit")
          5 shap_values = explainer.shap_values(x_test, nsamples=10)

~/anaconda3/lib/python3.6/site-packages/shap/explainers/kernel.py in __init__(self, model, data, link, **kwargs)
     95         self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
     96         self.data = convert_to_data(data, keep_index=self.keep_index)
---> 97         model_null = match_model_to_data(self.model, self.data)
     98 
     99         # enforce our current input type limitations

~/anaconda3/lib/python3.6/site-packages/shap/common.py in match_model_to_data(model, data)
     80             out_val = model.f(data.convert_to_df())
     81         else:
---> 82             out_val = model.f(data.data)
     83     except:
     84         print("Provided model function fails when applied to the provided data set.")

~/anaconda3/lib/python3.6/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
    116 
    117         # lambda, but not partial, allows help() to work with update_wrapper
--> 118         out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
    119         # update the docstring of the returned function
    120         update_wrapper(out, self.fn)

~/anaconda3/lib/python3.6/site-packages/sklearn/pipeline.py in predict_proba(self, X)
    379         for name, transform in self.steps[:-1]:
    380             if transform is not None:
--> 381                 Xt = transform.transform(Xt)
    382         return self.steps[-1][-1].predict_proba(Xt)
    383 

~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in transform(self, X)
    491 
    492         X = _check_X(X)
--> 493         Xs = self._fit_transform(X, None, _transform_one, fitted=True)
    494         self._validate_output(Xs)
    495 

~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
    391                               _get_column(X, column), y, weight)
    392                 for _, trans, column, weight in self._iter(
--> 393                     fitted=fitted, replace_strings=True))
    394         except ValueError as e:
    395             if "Expected 2D array, got 1D array instead" in str(e):

~/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 

~/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    752             tasks = BatchedCalls(itertools.islice(iterator, batch_size),
    753                                  self._backend.get_nested_backend(),
--> 754                                  self._pickle_cache)
    755             if len(tasks) == 0:
    756                 # No more tasks available in the iterator: tell caller to stop.

~/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __init__(self, iterator_slice, backend_and_jobs, pickle_cache)
    208 
    209     def __init__(self, iterator_slice, backend_and_jobs, pickle_cache=None):
--> 210         self.items = list(iterator_slice)
    211         self._size = len(self.items)
    212         if isinstance(backend_and_jobs, tuple):

~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
    390                 delayed(func)(clone(trans) if not fitted else trans,
    391                               _get_column(X, column), y, weight)
--> 392                 for _, trans, column, weight in self._iter(
    393                     fitted=fitted, replace_strings=True))
    394         except ValueError as e:

~/anaconda3/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in _get_column(X, key)
    609             return X.loc[:, key]
    610         else:
--> 611             raise ValueError("Specifying the columns using strings is only "
    612                              "supported for pandas DataFrames")
    613     else:

ValueError: Specifying the columns using strings is only supported for pandas DataFrames

任何人都请帮助我。我真的很坚持。 x_train和x_test都是熊猫数据框。

如何将Shap Kernal Explainer与PIpeline模型一起使用

0 个答案: