我正在尝试绘制RandomForestRegressor模型的feature_importance。但是,我的数据集中有307个特征(在OneHotEncoding之后),因此绘制所有特征在美学上并没有帮助。
我如何只绘制前5个(或前10个)的图?
这是我的实际代码:
# Help function to plot feature_importances
def plot_feature_importances(model_to_plot, features_list, x_train_set):
# Wichtigkeit der eizelnen Features plotten!
plt_x = np.linspace(0,len(features_list)-1,len(features_list))
print("Features sorted by their score:")
font = {'family' : 'normal',
'weight' : 'normal',
'size' : 12}
plt.rc('font', **font)
plt.figure(figsize=(15,7))
plt.bar(plt_x, model_to_plot.feature_importances_, width=0.5, color="blue",align='center')
plt.gca().set_xticklabels(plt_x, rotation=60 )
plt.title('Features importance in decision making', position=(.5,1.05), fontsize=20)
plt.xticks(plt_x, features_list, fontsize=12)
plt.yticks(fontsize=12)
plt.ylabel('Relative Information %', fontsize=15)
plt.xlabel('Features', fontsize=15)
plt.show()
print("Feature ranking:")
importances = model_to_plot.feature_importances_
std = np.std([tree.feature_importances_ for tree in model_to_plot.estimators_],
axis=0)
indices = np.argsort(importances)[::-1]
for f in range(x_train.shape[1]):
print("%d. Feature %s (%.2f)" % (f + 1, x_train_set.columns[indices[f]], importances[indices[f]]))
并使用以下代码进行绘图,得到的结果如下:
plot_feature_importances(model, features, x_train)
答案 0 :(得分:0)
您未能提供https://www.jacoco.org/jacoco/trunk/doc/prepare-agent-mojo.html#classDumpDir,因此我无法提供最终的工作答案。不过,您可以尝试以下修改的代码。我已经删除了设置x-ticks的行。但是那部分微不足道
def plot_feature_importances(model_to_plot, features_list, x_train_set):
to_plot = 5 # <---- Define the number to plot
importances = model_to_plot.feature_importances_
std = np.std([tree.feature_importances_ for tree in model_to_plot.estimators_],
axis=0)
indices = np.argsort(importances)[::-1][0:to_plot] # <--- Take the top 5
font = {'family' : 'normal',
'weight' : 'normal',
'size' : 12}
plt.rc('font', **font)
plt.figure(figsize=(15,7))
plt.bar(range(to_plot), importances[indices], width=0.5, color="blue",align='center') # <--- Plot the top 5
plt.xticks(range(to_plot), features_list[indices], fontsize=12) # <--- add ticks
plt.title('Features importance in decision making', position=(.5,1.05), fontsize=20)
plt.yticks(fontsize=12)
plt.ylabel('Relative Information %', fontsize=15)
plt.xlabel('Features', fontsize=15)
plt.show()
for f in range(x_train.shape[1]):
print("%d. Feature %s (%.2f)" % (f + 1, x_train_set.columns[indices[f]], importances[indices[f]]))
plot_feature_importances(model, features, x_train)