如何在seaborn图中用中值标记每个箱图?
E.g。
import seaborn as sns
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
ax = sns.boxplot(x="day", y="total_bill", data=tips)
如何用中位数或平均值标记每个箱图?
答案 0 :(得分:22)
当人们包含样本数据集时,我可以说我喜欢它。给你一个健康的+1!
import seaborn as sns, numpy as np
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
ax = sns.boxplot(x="day", y="total_bill", data=tips)
medians = tips.groupby(['day'])['total_bill'].median().values
median_labels = [str(np.round(s, 2)) for s in medians]
pos = range(len(medians))
for tick,label in zip(pos,ax.get_xticklabels()):
ax.text(pos[tick], medians[tick] + 0.5, median_labels[tick],
horizontalalignment='center', size='x-small', color='w', weight='semibold')
答案 1 :(得分:3)
基于ShikjarDua的出色方法,我创建了一个独立于刻度位置的版本。在处理Seaborn中的分组数据(即hue = parameter)时,这很方便。此外,我为传单添加了一个明确的开关,该开关可更改每个绘制框的线条。
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
def main():
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
# optionally disable fliers
showfliers = False
# plot data and create median labels
box_plot = sns.boxplot(x="day", y="total_bill", data=tips,
showfliers=showfliers, hue="sex")
create_median_labels(box_plot.axes, showfliers)
plt.show()
def create_median_labels(ax, has_fliers):
lines = ax.get_lines()
# depending on fliers, toggle between 5 and 6 lines per box
lines_per_box = 5 + int(has_fliers)
# iterate directly over all median lines, with an interval of lines_per_box
# this enables labeling of grouped data without relying on tick positions
for median_line in lines[4:len(lines):lines_per_box]:
# get center of median line
mean_x = sum(median_line._x) / len(median_line._x)
mean_y = sum(median_line._y) / len(median_line._y)
# print text to center coordinates
text = ax.text(mean_x, mean_y, f'{mean_y:.1f}',
ha='center', va='center',
fontweight='bold', size=10, color='white')
# create small black border around white text
# for better readability on multi-colored boxes
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground='black'),
path_effects.Normal(),
])
if __name__ == '__main__':
main()
答案 2 :(得分:2)
这也可以通过从图本身得出中位数而不用专门从数据中计算中位数来实现。
box_plot = sns.boxplot(x="day", y="total_bill", data=tips)
ax = box_plot.axes
lines = ax.get_lines()
categories = ax.get_xticks()
for cat in categories:
# every 4th line at the interval of 6 is median line
y = round(lines[4+cat*6].get_ydata()[0],1)
ax.text(
cat,
y,
f'{y}',
ha='center',
va='center',
fontweight='bold',
size=10,
color='white',
bbox=dict(facecolor='#445A64'))
box_plot.figure.tight_layout()