答案 0 :(得分:1)
hue
参数进行seaborn绘制。id
列。stubnames
,这就是为什么我将error
移到列名的前面的原因。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# setup data and dataframe
np.random.seed(365)
data = {'mod_lg': np.random.normal(0.3, .1, size=(30,)),
'mod_rf': np.random.normal(0.05, .01, size=(30,)),
'mod_bg': np.random.normal(0.02, 0.002, size=(30,)),
'mean_train_score': np.random.normal(0.95, 0.3, size=(30,)),
'mean_test_score': np.random.normal(0.86, 0.5, size=(30,))}
df = pd.DataFrame(data)
df['error_mean_test_score'] = [1] - df['mean_test_score']
df['error_mean_train_score'] = [1] - df['mean_train_score']
df["id"] = df.index
df = pd.wide_to_long(df, stubnames='mod', i='id', j='mode', sep='_', suffix='\D+').reset_index()
df["id"] = df.index
# display dataframe: this is probably what your dataframe looks like to generate your current plots
id mode mean_train_score error_mean_test_score mean_test_score error_mean_train_score mod
0 0 lg 0.663855 -0.343961 1.343961 0.336145 0.316792
1 1 lg 0.990114 0.472847 0.527153 0.009886 0.352351
2 2 lg 1.179775 0.324748 0.675252 -0.179775 0.381738
3 3 lg 0.693155 0.519526 0.480474 0.306845 0.470385
4 4 lg 1.191048 -0.128033 1.128033 -0.191048 0.085305
error_score_name
列包含error_mean_test_score
和error_mean_train_score
error_score_value
列包含值。# convert df error columns to long format
dfl = pd.wide_to_long(df, stubnames='error', i='id', j='score', sep='_', suffix='\D+').reset_index(level=1)
dfl.rename(columns={'score': 'error_score_name', 'error': 'error_score_value'}, inplace=True)
# display dfl
error_score_name mean_train_score mod mean_test_score mode error_score_value
id
0 mean_test_score 0.663855 0.316792 1.343961 lg -0.343961
1 mean_test_score 0.990114 0.352351 0.527153 lg 0.472847
2 mean_test_score 1.179775 0.381738 0.675252 lg 0.324748
3 mean_test_score 0.693155 0.470385 0.480474 lg 0.519526
4 mean_test_score 1.191048 0.085305 1.128033 lg -0.128033
# plot dfl
sns.boxplot(x='mode', y='error_score_value', data=dfl, hue='error_score_name')