如何从python轻松生成报告,包括matplotlib数字和pandas表

时间:2015-02-05 08:11:28

标签: python matplotlib markdown mpld3

我有一个计算性能报告的代码(包括文本,pandas表和matplotlib数字)。我正在寻找将此报告导出到文件(html / pdf等)。

我尝试使用pweave,但我无法使其正常工作(网站中的示例有效,但当我尝试在我的代码中使用它时,它有类和函数似乎无法正常工作)。此外,pweave似乎不支持熊猫表。

我对markdown软件包不熟悉,但这可能是mpld3的关键。有人可以帮我举个例子吗?

谢谢, 哈南。

2 个答案:

答案 0 :(得分:0)

对你的问题的回答很晚:

即使有自己的课程,Pweave也能很好地运作。只需确保您的类文件的目录在python的导入路径中。一种方法是在python块中添加目录,如下所示:

```python, echo=False
import sys
sys.path.append('/path/to/your/python/files/')
import myfile
myfile.myclass...
```

Pweave可以输出Markdown格式(选项-f pandoc),然后您可以使用pandoc处理HTML或通过LaTeX处理PDF格式。

关于pandas表:通过tabulate包将它们在python块中转换为nmarkdown。

答案 1 :(得分:-1)

来自http://buddapie.com/2015/10/31/report-with-matplotlib-tables-and-plots/

生成整个报告的代码如下。对于表格,我写了一个算法,将长列分成多个较短的列。我正在使用Seaborn库,因为我非常喜欢格式化,它非常容易设置。

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.table as tbl
import seaborn as sns

#range for floats
def frange(x, y, jump):
    while x<=y:
        yield x
        x += jump

#function to return the table, modifying if needed due to size etc
def get_table(ax, ts_list, list_column_labels, list_col_widths, list_col_formats):

    divisor = len(list_col_formats)
    max_length = 19

    #adjusts max length to a min of the length of the timeseries
    max_length = min(max_length, len(ts_list[1]))   

    #creates table array and labels vector
    l = len(ts_list[1])
    l1 = divisor*(l//max_length+1)
    table_array_temp = [get_section(ts_list, i, max_length, l, list_col_formats, divisor) for i in range(l1)]
    table_array = np.array(table_array_temp[0])

    #to fit column width to each column
    table_widths = np.array([list_col_widths[i%divisor] for i in range(l1)])

    for i in range(1,l1):
        #if temp array size doesn't match (it can happen at last rows) adds 0s 
        if len(table_array_temp[i])<max_length:
            table_array_temp[i] = np.concatenate((table_array_temp[i], np.zeros(max_length-len(table_array_temp[i]))))
        table_array = np.vstack((table_array, table_array_temp[i]))

    table_labels = np.array([list_column_labels[i%divisor] for i in range((divisor*(l//max_length+1)))])     

    the_table = ax.table(cellText=np.transpose(table_array), colWidths=table_widths, colLabels=table_labels, loc='center')
    the_table.auto_set_font_size(False)
    the_table.set_fontsize(6)

    table_props = the_table.properties()
    table_cells = table_props['child_artists']
    for cell in table_cells:
        cell.set_height(0.05)

    return the_table

#formats number
def format(j, format):
    return locale.format(format, j, grouping=True)

#util funciton for dividing values in array
def get_section(list_ts, i, max_length, l, col_formats, d):
    k = i//d
    return [format(j, col_formats[i%d]) for j in list_ts[i%d][k*max_length:min((k+1)*max_length, l)]]

#function to write report
def write_report(list_plots):

    #sets the grid shape
    #tuple for shape of figure
    shape = (3,2)
    fig, axes = plt.subplots(nrows=shape[0], ncols=shape[1], figsize=(8, 10))

    #lists for changing parameters
    list_labels = [['periods', 'cash flows (m)', 'balance (m)'], ['periods', 'cashflows'], ['periods', 'cash flows']]
    list_titles = ['Simulation 1', 'Simulation 2', 'Simulation 3']

    #where we position the plots and the tables
    axes_plots = [0,2,4]
    axes_tables = [1,3,5]

    #line graphs
    fontsize = 5
    labelsize = 4
    for i in range(len(list_plots)):
        shape = np.array(list_plots[i]).shape
        if len(shape)==2:
            first_ts = list_plots[i][0]
        else:
            first_ts = list_plots[i]
        ax1 = axes.flat[axes_plots[i]]
        ax1.plot(first_ts, c='blue', linewidth=1)
        ax1.set_xlabel(list_labels[i][0], fontsize=fontsize)
        ax1.set_ylabel(list_labels[i][1], fontsize=fontsize)
        ax1.yaxis.label.set_color('blue')
        ax1.tick_params(axis='both', which='major', labelsize=labelsize)
        ax1.set_title(list_titles[i] , fontsize=7)  

        #if the other axis is relevant, builds it
        if len(shape)==2:
            _test = list_plots[i][1]
            ax2 = ax1.twinx()
            ax2.plot(list_plots[i][1], c='red', linewidth=1)
            ax2.set_ylabel(list_labels[i][2], fontsize=fontsize)
            ax2.yaxis.label.set_color('red')
            ax2.tick_params(axis='both', which='major', labelsize=labelsize)

    col_widths = [[0.1, 0.095, 0.15], [0.125, 0.11, 0.15], [0.125, 0.13]]
    col_formats = [['%.d', '%.2f', '%.2f'], ['%.d', '%.2f'], ['%.d', '%.2f']]
    col_labels = [['period', 'cf', 'balance'], ['period (y)', 'cf'], ['period (y)', 'cf']]

    #asset simulation tables
    for i in range(len(list_plots)):
        shape = np.array(list_plots[i]).shape
        ax1 = axes.flat[axes_tables[i]]
        #hides the axis etc
        ax1.set_frame_on(False)
        ax1.get_xaxis().set_visible(False)
        ax1.get_yaxis().set_visible(False)

        if len(shape)==2:
            periods = [k for k in range(len(list_plots[i][0]))]
            t_array = np.array([l for l in [periods, list_plots[i][0], list_plots[i][1]] if l!=None])
        else:
             periods = [k for k in range(len(list_plots[i]))]
             t_array = np.array([l for l in [periods, list_plots[i]] if l!=None])

        #gets the table
        the_table = get_table(ax1, t_array, col_labels[i], col_widths[i], col_formats[i])
        ax1.add_table(the_table)
        ax1.set_title(list_titles[i], fontsize=7)        

    plt.tight_layout()
    plt.savefig('TestReport', dpi=400)

if __name__=='__main__':

    #example timeseries
    ts1 = [np.arange(50), np.array([np.power(100, -i*0.01) for i in range(50)])]
    ts2 = np.arange(25)
    ts3 = [np.power(i, 3) for i in frange(3, 200, 3.5)]

    write_report([ts1, ts2, ts3])