如何从熊猫数据透视表中获取计算列名

时间:2017-05-09 00:16:52

标签: python pandas

我正在尝试从pandas.pivot_table创建一个excel电子表格。

我不想使用to_excel:

  • 以一半的速度渲染
  • 不允许格式化格式
  • 不计算单元格宽度
  • 不允许我使用公式
  • 创建其他单元格

  • 从列表列表中创建数据框
  • 使用pivot_table转换为数据透视表
  • 使用to_records
  • 转换为记录

现在我可以创建一个工作表,但是我需要索引的列标题,我可以在创建数据透视表时指定它们,但是如何从不同的值中推断出推断的column_names?

import pandas
import datetime
import logging
import math
import random
from dateutil.relativedelta import relativedelta
import xlsxwriter

logging.basicConfig(level=logging.INFO)


def get_sales_data(*, num_records, num_custs, num_products, date_from, number_of_months):
    print("number of months %s" % number_of_months)
    sales = {}
    rows = []
    for cust_nbr in range(0, num_custs):
        cust_name = "cust " + "{0:0>3}".format(cust_nbr)
        for month_delta in range(0, number_of_months):
            ship_date = date_from + relativedelta(months=month_delta)
            for product_nbr in (0, num_products):
                product = "product " + str(product_nbr)
                qty = random.randint(0, 20)
                if (qty > 0):
                    key = (cust_name, product, ship_date)
                    shipment = (cust_name, product, ship_date, qty)
                    sales[key] = shipment
    for shipment in sales.values():
        rows.append(shipment)
    return rows

def to_excel(workbook, sheetname, dataframe):
    worksheet = workbook.add_worksheet(sheetname)
    row_index = 1
    max_widths = [None] * len(dataframe[0])
    for row in dataframe:
        #max_widths = [None] * len(row)
        col_index = 0
        for datum in row:
            if datum is not None:
                if isinstance(datum, float):
                    if not math.isnan(datum):
                        worksheet.write(row_index, col_index, datum)
                else:
                    worksheet.write(row_index, col_index, datum)
                # print ("len(max_widths) %s col_index %s " % (len(max_widths),col_index))
                if max_widths[col_index] is None or len(str(datum)) > max_widths[col_index]:
                    max_widths[col_index] = len(str(datum))
            # if row_index < 5:
            #     print("r: %s c: %s %s" % (row_index, col_index, datum))
            col_index += 1
        row_index += 1

        col_index = 0
        for width in max_widths:
            worksheet.set_column(col_index, col_index, width + 1)
            col_index += 1


# Get a List of Lists
from_date = datetime.date(2015, 1, 1)
to_date = datetime.date(2017, 7, 1)
matrix = get_sales_data(num_records=3000, num_products=3, date_from=from_date, number_of_months=30, num_custs=1000)

#  Get a dataframe

labels = ["cust_name", "product", "ship_date", "qty"]
dataframe = pandas.DataFrame.from_records(matrix, columns=labels)
print(dataframe)

# get pivot

pivot_table = pandas.pivot_table(dataframe, columns='ship_date', values='qty', index=['cust_name', 'product'])
print(pivot_table)

# convert back to records

records = pivot_table.to_records()

# get excel

output = open("/tmp/crosstab.xslx", "wb")
workbook = xlsxwriter.Workbook(output)
to_excel(workbook, "Cust Product By Month", records)
workbook.close()

0 个答案:

没有答案