以下绘制了一个堆积的条形图,该条形图分为4个子图。从Area
调用这四个子图。从Result
中调用值。此列包含0和1。我想为Group
中的每个不同组合绘制这些值的总数。
这工作正常,但我希望使用辅助轴以线图形式显示标准化值。具体来说,是1与0的百分比。目前,我只需要总数0's
和1's
作为条形图。我希望使用辅助y轴绘制1's
的百分比。
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({
'Result' :[0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1],
'Group' :[-2,-1,1,0,0,-1,-1,0,1,-1,0,1,-1,1,0,1],
'Area' :['North','East','South','West','North','East','South','West','North','East','South','West','North','East','South','West'],
})
total = df['Result'].sum()
def custom_stacked_barplot(t, sub_df, ax):
plot_df = pd.crosstab(index = sub_df['Group'],
columns = sub_df['Result'],
values = sub_df['Result'],
aggfunc = ['count',(lambda x: sum(x)/total*100)],
)
p = plot_df.plot(kind = "bar", y = 'count',stacked = True, ax = ax, rot = 0, width = 0.6, legend = False)
ax2=ax.twinx()
#plot norm line
#r = plot_df.plot(y = '<lambda>', ax = ax2, legend = False, zorder = 2, color = 'black')
return p
g_dfs = df.groupby(['Area'])
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8,12))
for ax, (i,g) in zip(axes.ravel(), sorted(g_dfs)):
custom_stacked_barplot(i, g, ax)
plt.legend(bbox_to_anchor=(1.129, 2.56))
plt.show()
预期的df输出以绘图:
count perc
Result 0 1 0
Group
-1 1.0 2.0 0.66
1 0.0 1.0 1.0
count perc
Result 0 1 0
Group
-2 1.0 0.0 0.0
-1 0.0 1.0 1.0
0 1.0 0.0 0.0
1 0.0 1.0 1.0
count perc
Result 0 1 0
Group
-1 0.0 1.0 1.0
0 1.0 1.0 0.5
1 0.0 1.0 1.0
count perc
Result 0 1 0
Group
0 1.0 1.0 0.5
1 0.0 2.0 1.0
答案 0 :(得分:2)
尝试使用twinx()
import matplotlib.pyplot as plt
df = pd.DataFrame({
'Result' :[0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1],
'Group' :[-2,-1,1,0,0,-1,-1,0,1,-1,0,1,-1,1,0,1],
'Area' :['North','East','South','West','North','East','South','West','North','East','South','West','North','East','South','West'],
})
total = df['Result'].sum()
def custom_stacked_barplot(t, sub_df, ax):
plot_df = pd.crosstab(index = sub_df['Group'],
columns=sub_df['Result'],
values=sub_df['Result'],
aggfunc = ['count',(lambda x: sum(x)/total*100)])
print(plot_df)
p = plot_df.plot(kind="bar",y='count',stacked=True, ax = ax, rot = 0, width = 0.6, legend = False)
ax2=ax.twinx()
r = plot_df.plot(kind="bar",y='<lambda>', stacked=True, ax = ax2, rot = 0, width = 0.6, legend = False)
return p,r
g_dfs = df.groupby(['Area'])
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8,12))
for ax, (i,g) in zip(axes.ravel(), sorted(g_dfs)):
custom_stacked_barplot(i, g, ax)
plt.legend(bbox_to_anchor=(1.129, 2.56))
plt.show()
# save the plot as a file
fig.savefig('two_different_y_axis_for_single_python_plot_with_twinx.jpg',
format='jpeg',
dpi=100,
bbox_inches='tight')
plt.show()
答案 1 :(得分:1)
编辑:
def create_plot(ax, x, y1, y2, y3):
ax1 = ax
ax2 = ax1.twinx()
ax1.bar(x, y1)
ax1.bar(x, y2, bottom=y1)
ax2.plot(x, y3, c="C3")
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(8,12))
for ax in axes:
create_plot(ax, (1,2,3,4), (1,2,3,4), (7,5,3,1), (1,4,2,3))
plt.show()
类似
def create_plot(x, y1, y2, y3):
fig = plt.figure()
ax1 = fig.gca()
ax2 = ax1.twinx()
ax1.bar(x, y1)
ax1.bar(x, y2, bottom=y1)
ax2.plot(x, y3, c="C3")
return fig
fig = create_plot((1,2,3,4), (1,2,3,4), (7,5,3,1), (1,4,2,3))
plt.show()
满足您的需求?这给了我
答案 2 :(得分:1)
好,所以我也尝试了:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.DataFrame({
'Result' :[0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1],
'Group' :[-2,-1,1,0,0,-1,-1,0,1,-1,0,1,-1,1,0,1],
'Area' :['North','East','South','West','North','East','South','West','North','East','South','West','North','East','South','West'],
})
## iterate over unique areas
unique_areas = df['Area'].unique()
fig, axes = plt.subplots(nrows=len(unique_areas), ncols=1, figsize=(8,12))
twin_axes=[]
for i,key in enumerate(unique_areas):
# print(f"== {key} ==") #<- uncomment this line to debug
## first, filter the df by 'Area'
area_df = df[(df['Area']==key)]
## and do the crosstab:
ct_df = pd.crosstab(index=area_df['Group'],
columns=area_df['Result'],
)
## to add the 'count' label you wanted to the dataframe multiindex:
ct_df = pd.concat({'count': ct_df}, names=['type'],axis=1)
## now iterate over the unique 'Groups' in the index ...
for ix in ct_df.index:
sub_df = ct_df.loc[ix,'count']
## ... and calculate the contribution of each Result
# which is equal to '1' (ct_df.loc[ix,1])
# in the total for this group (ct_df.loc[ix].sum())
ct_df.loc[ix,'perc'] = sub_df.loc[1]/sub_df.sum()
# print(ct_df) #<- uncomment this line to debug
## add your stacked bar plot
bar = ct_df.plot(kind = "bar", y = 'count',stacked = True, ax = axes[i], rot = 0, width = 0.6, legend = False)
## keep the twin_axes in a separate list
twin_axes.append(axes[i].twinx())
## generate the "correct" x values that match the bar plot locations
# (i.e. use [0,1,2,3] instead of [-2,-1,0,1] )
xs=np.arange(0,len(ct_df),1)
## and plot the percentages as a function this new x range as a black line:
twin_axes[i].plot(xs,ct_df['perc'],zorder=2,color='black')
## optional:
# using these 'xs' you could also e.g. add some labels for the contained groups:
for x in xs:
twin_axes[i].text(x,1.15,ct_df.index[x],color="b")
# make some nice changes to the formatting of the plots
for a in [twin_axes]:
# a[i].set_xlim(-1,4)
a[i].set_ylim(0,1.1)
plt.show()
我主要建议不要尝试使用pd.crosstab
做所有事情,而是建议对独特区域进行一些快速简便的循环,以获取所需的df结构。
每个与组相关的数据框现在看起来都像您想要的:
type count perc
Result 0 1
Group
-2 1 0 0.0
-1 0 1 1.0
0 1 0 0.0
1 0 1 1.0
type count perc
Result 0 1
Group
-1 1 2 0.666667
1 0 1 1.000000
type count perc
Result 0 1
Group
-1 0 1 1.0
0 1 1 0.5
1 0 1 1.0
type count perc
Result 0 1
Group
0 1 1 0.5
1 0 2 1.0
现在情节看起来像这样: