我有一个带有MultiIndex的pandas DataFrame:
group subgroup obs_1 obs_2
GroupA Elem1 4 0
Elem2 34 2
Elem3 0 10
GroupB Elem4 5 21
等等。如this SO question中所述,这实际上在matplotlib中是可行的,但我宁愿(如果可能的话)使用我已经知道层次结构的事实(感谢MultiIndex)。目前发生的是索引显示为元组。
这样的事情可能吗?
答案 0 :(得分:5)
如果MultiIndex
中只有两个级别,我相信以下内容会更容易:
plt.figure()
ax = plt.gca()
DF.plot(kind='bar', ax=ax)
plt.grid(True, 'both')
minor_XT = ax.get_xaxis().get_majorticklocs()
DF['XT_V'] = minor_XT
major_XT = DF.groupby(by=DF.index.get_level_values(0)).first()['XT_V'].tolist()
DF.__delitem__('XT_V')
ax.set_xticks(minor_XT, minor=True)
ax.set_xticklabels(DF.index.get_level_values(1), minor=True)
ax.tick_params(which='major', pad=15)
_ = plt.xticks(major_XT, (DF.index.get_level_values(0)).unique(), rotation=0)
还有一些涉及但更通用的解决方案(并不重要你有多少级别):
def cvt_MIdx_tcklab(df):
Midx_ar = np.array(df.index.tolist())
Blank_ar = Midx_ar.copy()
col_idx = np.arange(Midx_ar.shape[0])
for i in range(Midx_ar.shape[1]):
val,idx = np.unique(Midx_ar[:, i], return_index=True)
Blank_ar[idx, i] = val
idx=~np.in1d(col_idx, idx)
Blank_ar[idx, i]=''
return map('\n'.join, np.fliplr(Blank_ar))
plt.figure()
ax = plt.gca()
DF.plot(kind='bar', ax=ax)
ax.set_xticklabels(cvt_MIdx_tcklab(DF), rotation=0)
答案 1 :(得分:2)
我认为没有一种很好的标准方法来绘制多索引数据帧。我发现@Stein的following solution在美学上是令人愉快的。我已经将他的例子改编为您的数据:
import pandas as pd
import matplotlib.pyplot as plt
from itertools import groupby
import numpy as np
%matplotlib inline
group = ('Group_A', 'Group_B')
subgroup = ('elem1', 'elem2', 'elem3', 'elem4')
obs = ('obs_1', 'obs_2')
index = pd.MultiIndex.from_tuples([('Group_A','elem1'),('Group_A','elem2'),('Group_A','elem3'),('Group_B','elem4')],
names=['group', 'subgroup'])
values = np.array([[4,0],[43,2],[0,10],[5,21]])
df = pd.DataFrame(index=index)
df['obs_1'] = values[:,0]
df['obs_2'] = values[:,1]
def add_line(ax, xpos, ypos):
line = plt.Line2D([xpos, xpos], [ypos + .1, ypos],
transform=ax.transAxes, color='gray')
line.set_clip_on(False)
ax.add_line(line)
def label_len(my_index,level):
labels = my_index.get_level_values(level)
return [(k, sum(1 for i in g)) for k,g in groupby(labels)]
def label_group_bar_table(ax, df):
ypos = -.1
scale = 1./df.index.size
for level in range(df.index.nlevels)[::-1]:
pos = 0
for label, rpos in label_len(df.index,level):
lxpos = (pos + .5 * rpos)*scale
ax.text(lxpos, ypos, label, ha='center', transform=ax.transAxes)
add_line(ax, pos*scale, ypos)
pos += rpos
add_line(ax, pos*scale , ypos)
ypos -= .1
ax = df.plot(kind='bar',stacked=False)
#Below 2 lines remove default labels
ax.set_xticklabels('')
ax.set_xlabel('')
label_group_bar_table(ax, df)
产生:
答案 2 :(得分:0)
您可以为每个组创建一个子图并用 wspace=0
将它们粘在一起。每个子图的宽度必须根据子组的数量使用 gridspec_kw
字典中的 width_ratios
参数进行校正,以便所有列具有相同的宽度。
然后有无限的格式选择。在下面的示例中,我选择在背景中绘制水平网格线,并使用小刻度线在组之间绘制分隔线。
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.pyplot as plt # v 3.3.2
# Create sample DataFrame with MultiIndex
df = pd.DataFrame(dict(group = ['GroupA', 'GroupA', 'GroupA', 'GroupB'],
subgroup = ['Elem1', 'Elem2', 'Elem3', 'Elem4'],
obs_1 = [4, 34, 0, 5],
obs_2 = [0, 2, 10, 21]))
df.set_index(['group', 'subgroup'], inplace=True)
# Create figure with a subplot for each group with a relative width that
# is proportional to the number of subgroups
groups = df.index.levels[0]
nplots = groups.size
plots_width_ratios = [df.xs(group).index.size for group in groups]
fig, axes = plt.subplots(nrows=1, ncols=nplots, sharey=True, figsize=(6, 4),
gridspec_kw = dict(width_ratios=plots_width_ratios, wspace=0))
# Loop through array of axes to create grouped bar chart for each group
alpha = 0.3 # used for grid lines, bottom spine and separation lines between groups
for group, ax in zip(groups, axes):
# Create bar chart with horizontal grid lines and no spines except bottom one
df.xs(group).plot.bar(ax=ax, legend=None, zorder=2)
ax.grid(axis='y', zorder=1, color='black', alpha=alpha)
for spine in ['top', 'left', 'right']:
ax.spines[spine].set_visible(False)
ax.spines['bottom'].set_alpha(alpha)
# Set and place x labels for groups
ax.set_xlabel(group)
ax.xaxis.set_label_coords(x=0.5, y=-0.15)
# Format major tick labels for subgroups
ax.set_xticklabels(ax.get_xticklabels(), rotation=0, ha='center')
ax.tick_params(axis='both', which='major', length=0, pad=10)
# Set and format minor tick marks for separation lines between groups: note
# that except for the first subplot, only the right tick mark is drawn to avoid
# duplicate overlapping lines so that when an alpha different from 1 is chosen
# (like in this example) all the lines look the same
if ax.is_first_col():
ax.set_xticks([*ax.get_xlim()], minor=True)
else:
ax.set_xticks([ax.get_xlim()[1]], minor=True)
ax.tick_params(which='minor', length=45, width=0.8, color=[0, 0, 0, alpha])
# Add legend using the labels and handles from the last subplot
fig.legend(*ax.get_legend_handles_labels(), frameon=False,
bbox_to_anchor=(0.92, 0.5), loc="center left")
title = 'Grouped bar chart of a hierarchical dataset with 2 levels'
fig.suptitle(title, y=1.01, size=14);