我正在使用matplotlib和一个stackbarchart程序,有人用它来编写堆积条形图。
我的图表:
x轴有8个收入分配,每个柱子一个
y轴是每个收入分配中的人的百分比。 person type -a是第一个堆栈,person type-b是secon dstack,person type-c是第三个堆栈。
我的条形图是居中对齐的,我正在尝试找出如何将条形空间分开以使图形更好看,因此标签更易于阅读。有任何建议或澄清吗?
程序是stackedBarGraph.py,代码如下所示,其中widths是一个包含8个值的数组,每个值对应一个条形图的宽度。
如果您需要更多信息,请告诉我(我试图保持一切相关)。谢谢!
完整代码(我希望阅读起来不太难):
from __future__ import division
from pylab import *
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher
data = csv2rec('coa.csv', delimiter=',')
x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']
df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))
#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source:
source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))
#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source2:
source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])
total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10
#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])
#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths, showFirst = 8, xLabels=d_labels,scale=True)
Stackedbarchart程序:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='' # label for y axis
):
#------------------------------------------------------------------------------
# data fixeratering
# make sure this makes sense
if showFirst != -1:
showFirst = np.min([showFirst, np.shape(data)[0]])
data_copy = np.copy(data[:showFirst]).transpose().astype('float')
data_shape = np.shape(data_copy)
if heights is not None:
heights = heights[:showFirst]
if widths is not None:
widths = widths[:showFirst]
showFirst = -1
else:
data_copy = np.copy(data).transpose()
data_shape = np.shape(data_copy)
# determine the number of bars and corresponding levels from the shape of the data
num_bars = data_shape[1]
levels = data_shape[0]
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2)
# stack the data --
# replace the value in each level by the cumulative sum of all preceding levels
data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)
# scale the data is needed
if scale:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
if heights is not None:
print "WARNING: setting scale and heights does not make sense."
heights = None
elif heights is not None:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
for i in np.arange(num_bars):
data_copy[:,i] *= heights[i]
data_stack[:,i] *= heights[i]
#------------------------------------------------------------------------------
# ticks
if yTicks is not "none":
# it is either a set of ticks or the number of auto ticks to make
real_ticks = True
try:
k = len(yTicks[1])
except:
real_ticks = False
if not real_ticks:
yTicks = float(yTicks)
if scale:
# make the ticks line up to 100 %
y_ticks_at = np.arange(yTicks)/(yTicks-1)
y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
else:
# space the ticks along the y axis
y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
y_tick_labels = np.array([str(i) for i in y_ticks_at])
yTicks=(y_ticks_at, y_tick_labels)
#------------------------------------------------------------------------------
# plot
if edgeCols is None:
edgeCols = ["none"]*len(cols)
# bars
ax.bar(x,
data_stack[0],
color=cols[0],alpha=0.7,
edgecolor=edgeCols[0],
width=widths,
linewidth=0.5,
align='center'
)
for i in np.arange(1,levels):
ax.bar(x,
data_copy[i],
bottom=data_stack[i-1],
color=cols[i],alpha=0.7,
edgecolor=edgeCols[i],
width=widths,
linewidth=0.5,
align='center'
)
# borders
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
# make ticks if necessary
if yTicks is not "none":
ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
ax.yaxis.tick_left()
plt.yticks(yTicks[0], yTicks[1])
else:
plt.yticks([], [])
if xLabels is not None:
ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
ax.xaxis.tick_bottom()
plt.xticks(x, xLabels, rotation='horizontal')
else:
plt.xticks([], [])
# limits
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
# labels
if xlabel != '':
ax.xlabel(xlabel)
if ylabel != '':
ax.ylabel(ylabel)
答案 0 :(得分:0)
好的,感谢大家的意见(以及Bill向我展示了如何有效地使用列表推导)。
我能够改变程序以实现我想要的(我认为)。我添加了一个新变量,axspacing到程序的以下部分:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='', # label for y axis
xaxlim=None,
axspacing=0,
):
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)
# limits
#ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
if xaxlim is None:
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
else:
ax.set_xlim(xaxlim)