如何防止使用seaborn在条形图中重叠?

时间:2016-12-21 10:14:20

标签: python pandas matplotlib seaborn

month   cate_cnt1_members   cate_cnt2_members   cate_cnt3_members   cate_cnt4_members   cate_cnt5_members   cate_cnt6_members   cate_cnt7_members   cate_cnt8_members   cate_cnt9_members   cate_cnt10_members  cate_cnt11_members  cate_cnt12_members  cate_cnt13_members  cate_cnt14_members
201501  93.525692   5.989799    0.455098    0.027863    0.001548    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201502  90.515995   8.396707    0.971026    0.107892    0.008380    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201503  82.525162   14.066414   2.836065    0.505229    0.061750    0.005380    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201504  75.546295   18.279664   4.884050    1.102780    0.172282    0.013621    0.001199    0.000109    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201505  71.142107   20.954861   6.278794    1.401423    0.206386    0.015837    0.000593    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201506  63.783161   23.386509   9.241094    2.914457    0.601408    0.067921    0.005178    0.000273    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201507  62.361179   23.364693   9.888232    3.445630    0.812055    0.116408    0.010563    0.001240    0.000000    0.000000    0.000000    0.000000    0.000000    0.0

上面显示了大熊猫数据框,它显示了不同类别的百分比,每个月都有所不同。我想使用seaborn来获得一个条形图,其条形图包含从第2列到结尾的14个类别成员的百分比。 这是我的代码:

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(6, 15))
sns.barplot(x = df['month'], y = df['cate_cnt1_members'], label='cate_cnt1_members',   color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df['month'], y = df['cate_cnt2_members'], label='cate_cnt2_members',   color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df['month'], y = df['cate_cnt3_members'], label='cate_cnt3_members',   color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df['month'], y = df['cate_cnt4_members'], label='cate_cnt4_members',   color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df['month'], y = df['cate_cnt5_members'], label='cate_cnt5_members',   color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df['month'], y = df['cate_cnt6_members'], label='cate_cnt6_members',   color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df['month'], y = df['cate_cnt7_members'], label='cate_cnt7_members',   color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df['month'], y = df['cate_cnt8_members'], label='cate_cnt8_members',   color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df['month'], y = df['cate_cnt9_members'], label='cate_cnt9_members',   color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df['month'], y = df['cate_cnt10_members'], label='cate_cnt10_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df['month'], y = df['cate_cnt11_members'], label='cate_cnt11_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df['month'], y = df['cate_cnt12_members'], label='cate_cnt12_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df['month'], y = df['cate_cnt13_members'], label='cate_cnt13_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df['month'], y = df['cate_cnt14_members'], label='cate_cnt14_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="topper middle", frameon=True)
sns.despine(left=True, bottom=True)

结果如下。但我不希望它们相互重叠。我希望14个组件总和为100并完全填充100。那我该怎么做呢? enter image description here

2 个答案:

答案 0 :(得分:3)

在熊猫中有一种简单易行的方法。首先,您必须将索引设置为月份,然后只创建堆积条形图

df = df.set_index('month')
df.plot.bar(stacked=True)

在seaborn中这样做有点棘手。您必须获取每行的累积总和,然后绘制

# set the index if you haven't
df = df.set_index('month')
df = df.cumsum(axis=1)

然后对原始代码进行一些小调整。然后以相反的顺序绘制,以便首先绘制100%条形图。

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(10, 15))
sns.barplot(x = df.index, y = df['cate_cnt14_members'], label='cate_cnt14_members',   color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df.index, y = df['cate_cnt13_members'], label='cate_cnt13_members',   color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df.index, y = df['cate_cnt12_members'], label='cate_cnt12_members',   color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df.index, y = df['cate_cnt11_members'], label='cate_cnt11_members',   color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df.index, y = df['cate_cnt10_members'], label='cate_cnt10_members',   color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df.index, y = df['cate_cnt9_members'], label='cate_cnt9_members',   color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df.index, y = df['cate_cnt8_members'], label='cate_cnt8_members',   color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df.index, y = df['cate_cnt7_members'], label='cate_cnt7_members',   color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df.index, y = df['cate_cnt6_members'], label='cate_cnt6_members',   color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df.index, y = df['cate_cnt5_members'], label='cate_cnt5_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df.index, y = df['cate_cnt4_members'], label='cate_cnt4_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df.index, y = df['cate_cnt3_members'], label='cate_cnt3_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df.index, y = df['cate_cnt2_members'], label='cate_cnt2_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df.index, y = df['cate_cnt1_members'], label='cate_cnt1_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="upper center", frameon=True)
sns.despine(left=True, bottom=True)

enter image description here

答案 1 :(得分:1)

考虑将数据从宽到长融化,然后运行数据透视表作为堆积条形图的来源:

from io import StringIO
import pandas as pd
from matplotlib import rc, pyplot as plt
import seaborn

data = """month,cate_cnt1_members,cate_cnt2_members,cate_cnt3_members,cate_cnt4_members,cate_cnt5_members,cate_cnt6_members,cate_cnt7_members,cate_cnt8_members,cate_cnt9_members,cate_cnt10_members,cate_cnt11_members,cate_cnt12_members,cate_cnt13_members,cate_cnt14_members
201501,93.525692,5.989799,0.455098,0.027863,0.001548,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201502,90.515995,8.396707,0.971026,0.107892,0.008380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201503,82.525162,14.066414,2.836065,0.505229,0.061750,0.005380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201504,75.546295,18.279664,4.884050,1.102780,0.172282,0.013621,0.001199,0.000109,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201505,71.142107,20.954861,6.278794,1.401423,0.206386,0.015837,0.000593,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201506,63.783161,23.386509,9.241094,2.914457,0.601408,0.067921,0.005178,0.000273,0.000000,0.000000,0.000000,0.000000,0.000000,0.0"""

df = pd.read_csv(StringIO(data))

dfm = pd.melt(df, id_vars="month")

seaborn.set()

dfm.pivot_table(values="value", columns="variable", index="month", aggfunc='sum').plot.bar(stacked=True)
locs, labels = plt.xticks()
plt.legend(loc='upper center', ncol=7, frameon=True, shadow=False, prop={'size':8})
plt.setp(labels, rotation=0, rotation_mode="anchor", ha="center")
plt.show()

Stacked Bar Graph