openpyxl - 覆盖数据表并保留数据透视表

时间:2018-01-24 15:26:56

标签: python-3.x openpyxl

我已经看到了这个问题的一些答案,但没有一个能够奏效。 例如:How to write to an existing excel file without breaking formulas with openpyxl?

Docs似乎没有任何消失: http://openpyxl.readthedocs.io/en/latest/api/openpyxl.reader.excel.html 我尝试将xls.load_workbook替换为xls.reader.excel.load_workbook,但它并没有改变任何内容。

我当前的代码会覆盖数据表中的数据,但会破坏另一个工作表中的数据透视表功能(工作表仍在那里但只有值)。知道如何保留数据透视表吗?

import pandas as pd
import openpyxl as xls
from shutil import copyfile

template_file = 'openpy_test.xlsx' 
output_file = 'openpy_output.xlsx' 

copyfile(template_file, output_file)

book = xls.load_workbook(output_file,guess_types=False,data_only=False)
writer = pd.ExcelWriter(output_file,engine='openpyxl')

writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

df.to_excel(writer,sheet_name='data',index=False,encoding='utf8')
writer.save()

我也尝试了book.save('dummycopy.xlsx'),它还使用非功能数据透视表进行保存。所以我确定问题与load_workbook函数有关。

包装版本:

openpyxl 2.4.10 py36_0

pandas 0.20.3 py36hce827b7_2

1 个答案:

答案 0 :(得分:1)

我认为openpyxl目前不支持excel数据透视表。我不得不切换到使用win32com库。

这是一个包装模块,我写的用透视表做特定的东西;它基本上是VBA翻译成python(记录宏并阅读VBA,它有意义)。希望能帮助到你。它仍然是一项正在进行的工作,但应该足以让你与之合作。

import os, datetime
import win32com.client as win32
win32c = win32.constants
import sys, datetime

letters = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ' #space to compensate for index. if letter is a if column is 1

def Pull_excel_workbook(path = '', filename = '', visible = False):
    '''function to run excel on the given filename'''
    if path == '': path = os.getcwd()
    if filename == '': raise FileNotFoundError('Please supply a file')
    excel = win32.gencache.EnsureDispatch('Excel.Application')
    excel.Visible = visible
    try: wb = excel.Workbooks.Open(path + filename)
    except: print('Try again\n{}'.format(sys.exc_info()))
    ws = wb.ActiveSheet
    data = list(ws.UsedRange.Value) #2d list of rows and columns
    src = '{}!R1C1:R{}C{}'.format(ws.Name, len(data), len(data[0]))
    return excel, wb, src

    #wb.SaveAs(path + filename)

def Create_pivottable(wb, src, table_name = 'Pivot'):
    '''creates Pivot Table object in the wb in a new Pivot worksheet'''
    ws = wb.Sheets.Add()    #should also change wb.ActiveSheet to the new one.
    ws.Name = table_name
    tname = ws.Name
    starting_point = (4,1)  #row, column
    pc = wb.PivotCaches().Add(SourceType = win32c.xlDatabase,
                              SourceData = src)
    try:
        pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
                            TableName = table_name,
                            DefaultVersion = win32c.xlPivotTableVersion10 #15
                            )
    except:             #not sure if will work...
        print('{}:{}:{}:{}'.format(wb, src, table_name, '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1])))
        #tabledestination format of RN Pivot!R4C1 is not correct format, should be 'RN Pivot'!R4C1
        pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
                            TableName = table_name,
                            DefaultVersion = win32c.xlPivotTableVersion15
                            )
    wb.Sheets(ws.Name).Select()
    wb.Sheets(ws.Name).Cells(3,1).Select()

def Add_to_Filter(wb, tname, field_name):
    ''' '''
    field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
    field.Orientation = win32c.xlPageField
    field.Position = 1

def Add_to_Row(wb, tname, field_name, position = 1):
    ''' '''
    field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
    field.Orientation = win32c.xlRowField
    field.Position = position

def Add_to_Column(wb, tname, field_name, position = 1):
    ''' '''
    field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
    field.Orientation = win32c.xlColumnField
    field.Position = position
    if position > 1:
        text = 'maybe do something here....'
        pass

def Add_to_Value(wb, tname, field_name, alias = '', calculation = 'xlSum'):
    ''' '''
    if type(calculation) is str and calculation in win32c.__dict__['__dicts__'][0]:
        calculation = win32c.__dict__['__dicts__'][0][calculation]
    datafield = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
    wb.ActiveSheet.PivotTables(tname).AddDataField(datafield, alias, calculation)

def LtoC(letter):
    global letters
    col = letters.index(letter)
    return col

def CtoL(col):
    global letters
    letter = letters[col]
    return letter

def Format_pretty(wb, tname, row_to_colapse):
    '''makes it look prettier'''
    wb.ActiveSheet.PivotTables(tname).TableStyle2 = 'PivotStyleMedium9'
    if type(row_to_colapse) is not str:
        for row in row_to_colapse:
            wb.ActiveSheet.PivotTables(tname).PivotFields(row).ShowDetail = False #collapses
            wb.ActiveSheet.PivotTables(tname).PivotFields(row).RepeatLabels = True #repeats labels
    else:
        wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).ShowDetail = False #collapses
        wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).RepeatLabels = True #repeats labels
    wb.ActiveSheet.Columns('A:Z').EntireColumn.AutoFit()
    wb.ActiveSheet.Range('A1').Select()

def Add_calcd_col(ws, col, row_start, row_end, formula, style = '', col_title = 'default'):
    '''col and rows should be int
    '''
    letter = CtoL(col)
    ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Select()
    ws.Cells(row_start, col).Value = col_title
    for row in range(row_start + 1, row_end + 1):
        ws.Cells(row, col).Value = formula.format(row)
    ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Style = style
    #print("ws.Range('{0}1:{0}200'.format({0})).Style = style".format(letter))
    #ws.Range('{0}1:{0}200'.format(letter)).Style = style

def Values_to_columns(wb,tname, position = 2):
    ''' '''
    wb.ActiveSheet.PivotTables(tname).DataPivotField.Orientation = win32c.xlColumnField
    wb.ActiveSheet.PivotTables(tname).DataPivotField.Position = position

def WB_save(wb, path, tname, filename):
    '''clean save of the new file '''
    #Format_pretty(wb, tname, 'Division') #that needs to be fixed....
    new_filename = filename[:-5] + '-{}.xlsx'.format(datetime.date.today().strftime('%m.%d.%y'))
    wb.SaveAs(path + new_filename)

def Pivot_refresh(path, filename, pivot_sheet_name, pivot_table_name = 'Pivot'):
    '''function to refresh the pivot table
    tested and functional with recruiting prod report'''
    excel, wb, src = Pull_excel_workbook(path = path, filename = filename)
    wb.Sheets(pivot_sheet_name).Select()
    cell = 'A6' #need a better way for this
    excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable.RefreshTable()
    #pvt = excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable
    #pvt.RefreshTable()
    WB_save(wb, path, pivot_table_name, filename)
    #pivot refresh
    #new = filename[:-5] + '-{}.xlsx'.format(2)
    #Pivot_refresh(path = path, filename = new, pivot_sheet_name = 'Pivot')

def Hide_columns(wb, tname, start, end):
    '''Hides columns'''
    if type(start) is not str: start = CtoL(start)
    if type(end) is not str: end = CtoL(end)
    wb.ActiveSheet.Columns('{}:{}'.format(start, end)).EntireColumn.Hidden = True