代码如下:
# coding=utf-8 from __future__ import print_function
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import sklearn
import sklearn.datasets
import sklearn.ensemble
import numpy as np
import lime
import lime.lime_tabular
class Randomforest:
def __init__(self):
self.trained_model = None
self.clf = None
pass
def split_dataset(self, dataset, train_percentage, feature_headers, target_header):
# Split dataset into train and test dataset
self.train_x, self.test_x, self.train_y, self.test_y = train_test_split(dataset[feature_headers], dataset[target_header],
train_size=train_percentage)
return self.train_x, self.test_x, self.train_y, self.test_y
def random_forest_classifier(self, features, target):
self.clf = RandomForestClassifier()
self.clf.fit(features, target)
return self.clf
def predictProba(self, input):
return self.clf.predict_proba(input)
def predict_proba(self, input):
return self.predictProba(input)
rf = Randomforest()
Headers = ["vectorName", "abmessungen_Lange", "starrflugler", "tragflachen", "triebwerke", "rumpf", "leitwerk", "drehflugler",
"drehflugler_Rumpf_Cockpit", "doppeldecker", "tragflachen_Stellung_Gerade","hochDecker","triebwerke_triebwerksart",
"rumpf_Rumpfformen","drehflugler_Rotor","drehflugler_Triebwerk","drehflugler_Rumpf","drehflugler_Heckausleger",
"drehflugler_Triebwerk_Lufteinlass","drehflugler_Triebwerk_Luftauslass", "result"]
dataset = pd.read_csv("filename.csv")
train_x, test_x, train_y, test_y = rf.split_dataset(dataset, 0.7, Headers[1:-1], Headers[-1])
trained_model = rf.random_forest_classifier(train_x, train_y)
predictions = trained_model.predict(test_x)
feature_names = Headers[1:-1]
class_names = ['1', '0']
explainer = lime.lime_tabular.LimeTabularExplainer(train_x, feature_names= feature_names, class_names=class_names,
categorical_features= None, categorical_names=None, discretize_continuous=True,
kernel_width=3)
错误如下:
Traceback (most recent call last):
File "/home/veerap/PycharmProjects/RandomForest/rf.py", line 79, in <module>
categorical_names=None, categorical_features=None, kernel_width=3)
File "/home/veerap/.local/lib/python2.7/site-packages/lime/lime_tabular.py", line 164, in __init__
self.feature_names, labels=training_labels)
File "/home/veerap/.local/lib/python2.7/site-packages/lime/discretize.py", line 125, in __init__
random_state=random_state)
File "/home/veerap/.local/lib/python2.7/site-packages/lime/discretize.py", line 46, in __init__
bins = self.bins(data, labels)
File "/home/veerap/.local/lib/python2.7/site-packages/lime/discretize.py", line 130, in bins
qts = np.array(np.percentile(data[:, feature], [25, 50, 75]))
File "/home/veerap/.local/lib/python2.7/site-packages/pandas/core/frame.py", line 2139, in __getitem__
return self._getitem_column(key)
File "/home/veerap/.local/lib/python2.7/site-packages/pandas/core/frame.py", line 2146, in _getitem_column
return self._get_item_cache(key)
File "/home/veerap/.local/lib/python2.7/site-packages/pandas/core/generic.py", line 1840, in _get_item_cache
res = cache.get(item)
TypeError: unhashable type
运行上述代码后显示的lime解释器后出现此错误。我不确定这是pandas包错误还是lime包错误。我尝试了很多方法来调试它,比如将数据帧而不是列表发送到lime explaininer函数等。但是没有一种方法可行。如果有人尽早让我知道一个问题,那就太好了。感谢。
答案 0 :(得分:0)
问题来自你传递熊猫数据帧的事实(我认为)我遇到了同样的问题。
最简单的方法就是用你的情况替换tct-id
train_x
希望这个肝脏 欢呼声,