我有以下代码,该代码生成一个Seaborn剥离图,然后将观测值的数量写在每个类别下。如果我在stripplot()调用中指定了不同的类别顺序,则数字会乱序。
我需要帮助找出一种方法来对我的nob系列进行排序,以使数字与cat_order中类别的顺序匹配。有没有一种优雅的方法可以做到这一点?
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# create some fake data
df = pd.DataFrame(data=None,columns=['Fuel','MW'])
df = df.astype(dtype={'Fuel':'category', 'MW':'float'})
rnd = np.random.RandomState(1)
mymean = 4 # starting mean value for use in the distribution below
mysigma = 0.8 # sigma value for use in the distribution below
myfuels = ['Bio','Coal','Hydro','Gas','Oil','Other','Solar','Wind'] # categories
mysize = 10
for myfuel in myfuels:
myMWs = list(rnd.lognormal(mean=mymean,sigma=mysigma,size=mysize)) # random generated data
newdata = pd.DataFrame({'Fuel': [myfuel]*mysize,
'MW': myMWs})
df = df.append(newdata,
ignore_index=True
)
mymean = mymean + 0.4 # increment to have different distributions
mysize = mysize + 3
# see distribution of created data
# Set up figure and axes
fig1, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(14,5))
sns.set_style('darkgrid')
# define category order (same as myfuels, except Other is at end)
cat_order = ['Bio','Coal','Hydro','Gas','Oil','Solar','Wind','Other']
sns.stripplot(x = 'Fuel',
y = 'MW',
data=df,
order=cat_order,
jitter=0.35,
size=2.5,
ax = ax1,
palette='dark'
)
ax1.set_ylim([-400,5500])
# add tags for number of observations in each category
nobs = df.groupby(['Fuel'])['MW'].agg(['count'])['count'] #Is there a more elegant form of this line?
myv = -280 # get this value through inspection and trial and error
myh = 0 # first category is always at horiz position = 0
for item in nobs:
if myh == 0:
plt.text(myh, myv, 'n = ' + str(item), ha='center', fontsize=9)
else:
plt.text(myh, myv, item, ha='center', fontsize=9)
myh = myh + 1 # move one unit to the right for each category
plt.show()
这是当前输出(带有错误的观察次数指示)。