python瀑布图中的错误注释

时间:2019-04-24 23:26:30

标签: python annotate waterfall

我修改了一个瀑布式python代码,但给出了错误/缺少注释的信息。我怀疑传递给该函数的plt.annotate和格式有问题,但经过多次尝试后仍找不到确切的代码错误。我的代码如下:

> dput(head(data))
structure(list(attribute = c("Acceleration/speed", "Dribbling", 
"Aggression", "Ball Control", "Composure", "Crossing"), weights = c(0.174119699170201, 
0.0142504648411431, 0, 0.0821275086888316, 0, 0.079118689094399
), values = list(82.5, 88, 84, 92, 86, 90)), .Names = c("attribute", 
"weights", "values"), row.names = c(NA, 6L), class = c("player", 
"data.frame"))

当我运行下面的示例时,它显示为顶部图像,但是应该显示类似底部的图像。

class Waterfall_plot(object):
    def __init__(self, category, data, config=None):
        self.category = category
        self.data = data
        self.set_config()
        if config != None:
            self.config.update(config)

    def set_config(self):
        self.config = {
                "figsize" : (10,10),
                "x_lab" : "",
                "y_lab" : "Revenue",
                "formatting" : "{:,.0f}",
                "rotation_value" : 90,
                "sorted_value" : False,
                "threshold" : None,
                "net_label" : "Net",
                "other_label" : "Other",
                "Title" : "Revenue waterfall",
                "blue_color" : "blue",
                "green_color" : "#95FF24", 
                "red_color" : "r",
                "blank_color" : (0,0,0,0)
                }

    def run(self):

        #convert category and data to np.array
        category=np.array(self.category)
        data=np.array(self.data)

        #If sort by loss
        if self.config['sorted_value']: 
            abs_data = abs(data)
            data_order = np.argsort(abs_data)[::-1]
            data = data[data_order]
            category = category[data_order]

        #If group by threshold, group small loss less than the threshold into 'other' 
        if self.config['threshold']:
            abs_data = abs(data)
            threshold_v = abs_data.max()*self.config['threshold']
            if threshold_v > abs_data.min():
                category = np.append(category[abs_data>=threshold_v],self.config['other_label'])
                data = np.append(data[abs_data>=threshold_v],sum(data[abs_data<threshold_v]))

        changes = {'amount' : data}

        fig, ax = plt.subplots(figsize = self.config['figsize'])
        ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))

        #Store data and create a blank series to use for the waterfall
        trans = pd.DataFrame(changes,category)
        blank = trans.amount.cumsum().shift(1).fillna(0)

        trans['positive'] = trans['amount'] > 0

        #Get the net total number for the final element in the waterfall
        total = trans.sum().amount
        trans.loc[self.config['net_label']]= total
        blank.loc[self.config['net_label']] = total

        #The steps graphically show the levels as well as used for label placement
        step = blank.reset_index(drop=True).repeat(3).shift(-1)
        step[1::3] = np.nan

        #When plotting the last element, we want to show the full bar,
        #Set the blank to 0
        blank.loc[self.config['net_label']] = 0

        #define bar colors for net bar
        trans.loc[trans['positive'] > 1, 'positive'] = 99
        trans.loc[trans['positive'] < 0, 'positive'] = 99
        trans.loc[(trans['positive'] > 0) & (trans['positive'] < 1), 'positive'] = 99

        trans['color'] = trans['positive']

        trans.loc[trans['positive'] == 1, 'color'] = self.config['green_color']
        trans.loc[trans['positive'] == 0, 'color'] = self.config['red_color']
        trans.loc[trans['positive'] == 99, 'color'] = self.config['blue_color']

        my_colors = list(trans.color)

        #Plot and label
        my_plot = plt.bar(range(0,len(trans.index)), blank, width = 0.5, color = self.config['blank_color'])
        plt.bar(range(0,len(trans.index)), trans.amount, width=0.6,
             bottom=blank, color=my_colors)       


        #axis labels
        plt.xlabel("\n" + self.config['x_lab'])
        plt.ylabel(self.config['y_lab'] + "\n")

        #Get the y-axis position for the labels
        y_height = trans.amount.cumsum().shift(1).fillna(0)

        temp = list(trans.amount)

        # create dynamic chart range
        for i in range(len(temp)):
            if (i > 0) & (i < (len(temp) - 1)):
                temp[i] = temp[i] + temp[i-1]

        trans['temp'] = temp

        plot_max = trans['temp'].max()
        plot_min = trans['temp'].min()

        #Make sure the plot doesn't accidentally focus only on the changes in the data
        if all(i >= 0 for i in temp):
            plot_min = 0
            if all(i < 0 for i in temp):
                plot_max = 0

        if abs(plot_max) >= abs(plot_min):
            maxmax = abs(plot_max)   
        else:
            maxmax = abs(plot_min)

        pos_offset = maxmax / 40

        plot_offset = maxmax / 15 
        print(pos_offset,plot_offset)

        #Start label loop
        loop = 0
        for index, row in trans.iterrows():
            if row['amount'] == total:
                y = y_height[loop]
            else:
                y = y_height[loop] + row['amount']
                    # Determine if we want a neg or pos offset
            if row['amount'] > 0:
                y += (pos_offset*2)
                plt.annotate(self.config['formatting'].format(row['amount']),(loop,y),ha="center", color = 'g', fontsize=9)
            else:
                y -= (pos_offset*4)
                plt.annotate(self.config['formatting'].format(row['amount']),(loop,y),ha="center", color = 'r', fontsize=9)
        loop+=1

        #Scale up the y axis so there is room for the labels
        plt.ylim(0,plot_max+round(3.6*plot_offset, 7))

        #Rotate the labels
        plt.xticks(range(0,len(trans)), trans.index, rotation = self.config['rotation_value'])

        #add zero line and title
        plt.axhline(0, color='black', linewidth = 0.6, linestyle="dashed")
        plt.title(self.config['Title'])
        plt.tight_layout()

        return fig, ax

wronga busy cat

1 个答案:

答案 0 :(得分:0)

您需要制表loop+=1,因为它不在for循环中。顺便说一下图。

        loop = 0
        for index, row in trans.iterrows():
            if row['amount'] == total:
                y = y_height[loop]
            else:
                y = y_height[loop] + row['amount']
                    # Determine if we want a neg or pos offset
            if row['amount'] > 0:
                y += (pos_offset*2)
                plt.annotate(self.config['formatting'].format(row['amount']),(loop,y),ha="center", color = 'g', fontsize=9)
            else:
                y -= (pos_offset*4)
                plt.annotate(self.config['formatting'].format(row['amount']),(loop,y),ha="center", color = 'r', fontsize=9)
            loop+=1