Question

我用 seaborn 制作了一个分组箱线图。我有两个描述不同类型数据的子图，为了也比较类型（我想保持组原样），我想在类型的箱线图上绘制类型 2 的数据框的中位数1 反之亦然。这是我的脚本

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import netCDF4 as nc
sns.set_theme(style='ticks', palette='pastel')

fig = plt.figure(figsize=(15,5))

fig.subplots_adjust(hspace=0.12)
fig.subplots_adjust(wspace=0.15)
fig.subplots_adjust(right=0.98)
fig.subplots_adjust(left=0.12)
fig.subplots_adjust(bottom=0.1)
fig.subplots_adjust(top=0.98)

plt.rcParams['text.usetex'] = False
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['font.size'] = 11
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11

ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

def grouped_boxplot(axis_type1, axis_type2):
    methods = ['m1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7']

    df_model1_type1 = pd.DataFrame()
    df_model1_type2 = pd.DataFrame()
    df_model2_type1 = pd.DataFrame()
    df_model2_type2 = pd.DataFrame()
    df_model3_type1 = pd.DataFrame()
    df_model3_type2 = pd.DataFrame()
    df_model4_type1 = pd.DataFrame()
    df_model4_type2 = pd.DataFrame()

    for m in methods:
        df_model1_type1[m] = np.random.randint(1,101,10)
        df_model1_type2[m] = np.random.randint(1,101,10)
    for m in methods:
        df_model2_type1[m] = np.random.randint(1,101,10)
        df_model2_type2[m] = np.random.randint(1,101,10)
    for m in methods:
        df_model3_type1[m] = np.random.randint(1,101,10)
        df_model3_type2[m] = np.random.randint(1,101,10)
    for m in methods:
        df_model4_type1[m] = np.random.randint(1,101,10)
        df_model4_type2[m] = np.random.randint(1,101,10)

    df_model1_type1 = df_model1_type1.assign(Model='model1')
    df_model1_type2 = df_model1_type2.assign(Model='model1')
    df_model2_type1 = df_model2_type1.assign(Model='model2')
    df_model2_type2 = df_model2_type2.assign(Model='model2')
    df_model3_type1 = df_model3_type1.assign(Model='model3')
    df_model3_type2 = df_model3_type2.assign(Model='model3')
    df_model4_type1 = df_model4_type1.assign(Model='model4')
    df_model4_type2 = df_model4_type2.assign(Model='model4')

    df_type1 = pd.concat([df_model1_type1,df_model2_type1,df_model3_type1,
                          df_model4_type1])
    df_type2 = pd.concat([df_model1_type2,df_model2_type2,df_model3_type2,
                          df_model4_type2])

    df_type1_long = pd.melt(df_type1, 'Model', var_name='Method',
                            value_name='var')
    df_type2_long = pd.melt(df_type2, 'Model', var_name='Method',
                           value_name='var')

    axis_type1 = sns.boxplot(x='Model', hue='Method', y='var',
                             data=df_type1_long, showfliers=False, whis=0,
                             ax=axis_type1)
    axis_type2 = sns.boxplot(x='Model', hue='Method', y='var', data=df_type2_long,
                            showfliers=False, whis=0, ax=axis_type2)

    type1_median = df_type1.median().to_numpy()
    type2_median = df_type2.median().to_numpy()

    for xtick, ytick in zip(axis_type1.get_xticks(), type2_median):
        axis_type1.scatter(xtick, ytick, s=20, marker='*', color='red')

    for xtick, ytick in zip(axis_type2.get_xticks(), type1_median):
        axis_type2.scatter(xtick, ytick, s=20, marker='*', color='red')

    axis_type1.legend([],[], frameon=False)
    axis_type2.legend(loc='lower center', bbox_to_anchor=(-0.2,-0.25), ncol=7)

grouped_boxplot(ax1, ax2)

plt.show()
# plt.savefig('the_ultimate_boxplot.pdf')

我设法将中位数绘制到 xtick 上的箱线图上。

有没有办法让我可以为模型 1 的类型 2 的 m1（蓝色箱线图）的中位数在类型 1 的模型 1（蓝色箱线图）上的 m1（蓝色箱线图）的中位数，对于类型 1 的 m2（橙色箱线图）的中位数有一个符号模型 1 用于 m2 上的类型 2（橙色箱线图），用于类型 1 的模型 1 [...]？

感谢您的帮助！

Answer 1

sns.pointplot 可用于计算和定位中位数。

示例代码对 pointplot 使用以下参数：

dodge=.8 - .8 / len(methods)：dodge 将每个色调的点分开。点图和箱线图的默认减淡宽度不同。见this github issue。
linestyles=''：不要在点之间画线
markers='D'：使用钻石marker
color='black'：标记的颜色（默认颜色来自 hue
estimator=np.median：计算y值的中位数；请注意，它们与箱线图的中心线位于同一位置
ci=None：不显示置信区间

图例已更改以从 pointplot 中删除条目。 bbox_to_anchor 的 x 位置设置为 wspace 的一半，以试图将图例置于两个子图之间。

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

sns.set_theme(style='ticks', palette='pastel')

fig = plt.figure(figsize=(15, 5))
fig.subplots_adjust(wspace=0.15, right=0.98, left=0.04, bottom=0.14, top=0.98)

axis_type1 = fig.add_subplot(1, 2, 1)
axis_type2 = fig.add_subplot(1, 2, 2)
methods = ['m1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7']
models = ['model1', 'model2', 'model3', 'model4']

df_type1_long = pd.DataFrame({'Model': np.random.choice(models, 500),
                              'Method': np.random.choice(methods, 500),
                              'var': np.random.randint(1, 101, 500)})
df_type2_long = pd.DataFrame({'Model': np.random.choice(models, 800),
                              'Method': np.random.choice(methods, 800),
                              'var': np.random.randint(1, 101, 800)})

for df_long, ax in zip([df_type1_long, df_type2_long], [axis_type1, axis_type2]):
     sns.boxplot(x='Model', hue='Method', y='var', data=df_long,
                 showfliers=False, whis=0, ax=ax)
     sns.pointplot(x='Model', hue='Method', y='var', dodge=.8 - .8 / len(methods),
                   linestyles='', markers='D', color='black', estimator=np.median, ci=None,
                   data=df_long, ax=ax)
     # sns.pointplot(x='Model', hue='Method', y='var', dodge=.8 - .8 / len(methods),
     #               linestyles='', markers='v', color='black', estimator=np.min, ci=None,
     #               data=df_long, ax=ax)
axis_type1.set_xlabel('')
axis_type2.set_xlabel('')
axis_type1.legend_.remove()
axis_type2.legend(handles=axis_type2.legend_.legendHandles[:len(methods)],
                  loc='upper center', bbox_to_anchor=(-0.075, -0.06), ncol=len(methods))
plt.show()

每个箱线图上的 Seaborn 分组箱线图符号

1 个答案: