import sklearn
import sklearn.datasets
import sklearn.ensemble
import numpy as np
from treeinterpreter import treeinterpreter as ti
iris = sklearn.datasets.load_iris()
rf = sklearn.ensemble.RandomForestClassifier(n_estimators=500, random_state = 50 )
rf.fit(iris.data, iris.target)
instances =iris.data[100].reshape(1,-1)
prediction, biases, contributions = ti.predict(rf, instances)
for i in range(len(instances)):
for c, feature in sorted(zip(contributions[i],
iris.feature_names),
key=lambda x: ~abs(x[0].any())):
print (feature, c)
我正在尝试在此列表中打印最大值名称,但我得到True
。任何想法为什么以及如何缓解这个?
您可以复制/粘贴代码以在您的环境中运行
我稍微修改了问题,以打印最大值列的名称,而不是最大值
输出
Feature contributions:
--------------------
sepal length (cm) [-0.046 -0.01 0.057]
sepal width (cm) [-0. -0. 0.]
petal length (cm) [-0.136 -0.153 0.289]
petal width (cm) [-0.148 -0.171 0.319]
我希望的输出
花瓣宽度(cm)
答案 0 :(得分:1)
如果要获取数组的max元素,则应该使用c.max()而不是c.all()。这部分代码可以为您提供所需内容:
maxFeatures = []
for i in range(len(instances)):
maxList= 0
maxFeature = ''
for c, feature in sorted(zip(contributions[i],
iris.feature_names),
key=lambda x: ~abs(x[0].any())):
if c.max()>maxList:
maxList=c.max()
maxFeature=feature
print (feature, c)
maxFeatures.append(maxFeature)
print( maxFeatures )