我想用python从多个“是”或“否”的数据中绘制多个x类别的条形图。我已经开始编写一些代码,但是我认为我正在以缓慢的方式获得所需的解决方案。对于使用seaborn,Matplotlib或pandas但不使用Bokeh的解决方案,我会很好,因为我想按比例制作出版物质量的数字。
我最终想要的是:
这是我正在使用的数据集:
import pandas as pd
data = [{'ship': 'Yes','canoe': 'Yes', 'cruise': 'Yes', 'kayak': 'No','color': 'Red'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'Yes','canoe': 'No','color': 'Green'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'No','canoe': 'No','color': 'Green'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'No','canoe': 'No','color': 'Red'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'Yes','canoe': 'No','color': 'Red'},{'ship': 'No', 'cruise': 'Yes', 'kayak': 'No','canoe': 'Yes','color': 'Green'},{'ship': 'No', 'cruise': 'No', 'kayak': 'No','canoe': 'No','color': 'Green'},{'ship': 'No', 'cruise': 'No', 'kayak': 'No','canoe': 'No','color': 'Red'}]
df = pd.DataFrame(data)
这就是我的开始:
print(df['color'].value_counts())
red = 4 # there must be a better way to code this rather than manually. Perhaps using len()?
green = 4
# get count per type
ca = df['canoe'].value_counts()
cr = df['cruise'].value_counts()
ka = df['kayak'].value_counts()
sh = df['ship'].value_counts()
print(ca, cr, ka, sh)
# group by color
cac = df.groupby(['canoe','color'])
crc = df.groupby(['cruise','color'])
kac = df.groupby(['kayak','color'])
shc = df.groupby(['ship','color'])
# make plots
cac2 = cac['color'].value_counts().unstack()
cac2.plot(kind='bar', title = 'Canoe by color')
但是我真正想要的是所有x类别都放在一个图上,只显示“是”响应的结果,并取作“是”的比例,而不是仅仅计算在内。帮助吗?
答案 0 :(得分:2)
不确定我是否正确理解了这个问题。看起来每种船型和颜色的答案所占的比例似乎更有意义。
import matplotlib.pyplot as plt
import pandas as pd
data = [{'ship': 'Yes','canoe': 'Yes', 'cruise': 'Yes', 'kayak': 'No','color': 'Red'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'Yes','canoe': 'No','color': 'Green'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'No','canoe': 'No','color': 'Green'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'No','canoe': 'No','color': 'Red'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'Yes','canoe': 'No','color': 'Red'},{'ship': 'No', 'cruise': 'Yes', 'kayak': 'No','canoe': 'Yes','color': 'Green'},{'ship': 'No', 'cruise': 'No', 'kayak': 'No','canoe': 'No','color': 'Green'},{'ship': 'No', 'cruise': 'No', 'kayak': 'No','canoe': 'No','color': 'Red'}]
df = pd.DataFrame(data)
ax = df.replace(["Yes","No"],[1,0]).groupby("color").mean().transpose().plot.bar(color=["g","r"])
ax.set_title('Proportion "Yes" answers per of boat type and color')
plt.show()
例如所有绿色独木舟中有25%回答“是”。
答案 1 :(得分:1)
尝试一下。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import groupby
data = [{'ship': 'Yes','canoe': 'Yes', 'cruise': 'Yes', 'kayak': 'No','color': 'Red'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'Yes','canoe': 'No','color': 'Green'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'No','canoe': 'No','color': 'Green'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'No','canoe': 'No','color': 'Red'},{'ship': 'Yes', 'cruise': 'Yes', 'kayak': 'Yes','canoe': 'No','color': 'Red'},{'ship': 'No', 'cruise': 'Yes', 'kayak': 'No','canoe': 'Yes','color': 'Green'},{'ship': 'No', 'cruise': 'No', 'kayak': 'No','canoe': 'No','color': 'Green'},{'ship': 'No', 'cruise': 'No', 'kayak': 'No','canoe': 'No','color': 'Red'}]
df = pd.DataFrame(data)
df1 = df.replace(["Yes","No"],[1,0]).groupby("color").mean().stack().rename('% Yes').to_frame()
def add_line(ax, xpos, ypos):
line = plt.Line2D([xpos, xpos], [ypos + .1, ypos],
transform=ax.transAxes, color='gray')
line.set_clip_on(False)
ax.add_line(line)
def label_len(my_index,level):
labels = my_index.get_level_values(level)
return [(k, sum(1 for i in g)) for k,g in groupby(labels)]
def label_group_bar_table(ax, df):
ypos = -.1
scale = 1./df.index.size
for level in range(df.index.nlevels)[::-1]:
pos = 0
for label, rpos in label_len(df.index,level):
lxpos = (pos + .5 * rpos)*scale
ax.text(lxpos, ypos, label, ha='center', transform=ax.transAxes)
add_line(ax, pos*scale, ypos)
pos += rpos
add_line(ax, pos*scale , ypos)
ypos -= .1
colorlist = ['green','red']
cp = sns.color_palette(colorlist)
ax = sns.barplot(x=df1.index, y='% Yes', hue = df1.index.get_level_values(0), data=df1, palette=cp)
#Below 2 lines remove default labels
ax.set_xticklabels('')
ax.set_xlabel('')
label_group_bar_table(ax, df1)
输出:
答案 2 :(得分:0)
不确定您是否正在寻找它,请告诉我它是否有效。
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
tidy_df = pd.melt(df, id_vars=['color'] ,var_name='variable', value_name='value')
total_df = tidy_df[['variable']].groupby('variable').size().reset_index()
tidy_df = tidy_df.groupby(['color', 'variable', 'value']).size().reset_index()
merged_df = pd.merge(tidy_df, total_df, on='variable', how='left', suffixes=('_left', '_right'))
merged_df['proportion'] = merged_df['0_left']/merged_df['0_right']
# merged_df[merged_df['value'] == 'Yes']
palette ={"Green":"green","Red":"red"} # optional you can select your own
plt.figure(figsize=(12, 6))
sns.barplot(x='variable', y='proportion', hue='color',data=merged_df[merged_df['value'] == 'Yes'], palette=palette)
plt.xticks(rotation=65)
#plt.savefig('numbers.png')
plt.show()