openpyxl-如何保留xlsx自定义属性

时间:2018-10-16 08:53:37

标签: python-3.6 openpyxl xlsx

如何从我用openpyxl修改的xlsx模板中保留自定义属性?当我使用save() openpyxl工作簿时,这些自定义属性消失了!

  

自定义属性可以在这里找到:-

     

在Mac上->转到Excel中的“文件”菜单->“属性...”->“自定义”标签->   属性部分

enter image description here

1 个答案:

答案 0 :(得分:0)

我正在发布一个用于阅读和编写Workbook.CustomDocumentProperties的纯python解决方案,仅因为我目前还感到中不具备此功能而感到痛苦,并且我需要针对个人自动化项目的快速解决方案。 / p>

实际上,如果我能着手解决所有需要的库工作,我将尝试自己在中实现此功能(以及以后的Worksheet.CustomProperties)。https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1003

更新:我推送了我的文稿,并且应该很快接受并合并它:) https://foss.heptapod.net/openpyxl/openpyxl/-/merge_requests/384

因此,现在有一种解决方法,将.xlsx转换为.zip,然后直接在zip中读取和写入.xml文件,然后最后重命名为.xlsx。

要阅读Workbook.CustomDocumentProperties,您可以执行此操作-只需对这个很好的答案进行一点改动:https://stackoverflow.com/a/46919795/9792594

from lxml import etree as ET
import zipfile    

def get_custom_doc_properties(filename):
    path_file = os.path.abspath(filename)
    base, ext = os.path.splitext(path_file)
    zip_filename = base + ".zip"
    os.rename(path_file, zip_filename)

    main_ns = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}"
    docPr_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/custom-properties}"
    docPr_type = "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}" #i4, r8, filetime, bool, lpwstr
    r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
    cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"

    with zipfile.ZipFile(zip_filename) as zip:
        props = zip.open('docProps/custom.xml')
        text = props.read()
        xml = ET.fromstring(text)
        workbook_props = {}
        for child in XML:
           if (child.tag == f"{docPr_ns}property"):
                for cusPr in child:
                    workbook_props[child.attrib['name']] = cusPr.text
    return workbook_props 

#call like this:
get_custom_doc_properties(f'./example.xlsx')

向已经具有自定义doc props(因此已经具有“ docProps / custom.xml”文件)的文档中添加一个prop很简单,我们只需向xml追加一个自定义属性即可。

(但是,如果文档没有当前的自定义doc属性,那么我们需要从头开始生成“ docProps / custom.xml”文件,并添加内容覆盖和关系-请参见代码注释):< / p>

import os
from lxml import etree as ET
import zipfile
import shutil
import datetime
from tempfile import NamedTemporaryFile

def set_workbook_custom_document_properties(filename, cus_doc_prop_name, cus_doc_prop_val):

    if not isinstance(cus_doc_prop_name, str):
        print("you must supply a string as the 'cus_doc_prop_name'")
        return

    if isinstance(cus_doc_prop_val, str):
        docPr_type_suffix = "lpwstr"
        cus_doc_prop_str = cus_doc_prop_val
    elif isinstance(cus_doc_prop_val, int):
        docPr_type_suffix = "i4"
        cus_doc_prop_str = str(cus_doc_prop_val)
    elif isinstance(cus_doc_prop_val, float):
        docPr_type_suffix = "r8"
        cus_doc_prop_str = str(cus_doc_prop_val)
    elif isinstance(cus_doc_prop_val, bool):
        docPr_type_suffix = "bool"
        cus_doc_prop_str = str(cus_doc_prop_val)
    elif isinstance(cus_doc_prop_val, datetime.datetime):
        docPr_type_suffix = "filetime"
        cus_doc_prop_str = cus_doc_prop_val.strftime("%Y-%m-%dT%H:%M:%SZ")
    else:
        print("you must supply a string, int, float, bool, or date, as the 'cus_doc_prop_val'")
        return

    path_file = os.path.abspath(filename)
    base, ext = os.path.splitext(path_file)
    zip_filename = base + ".zip"
    os.rename(path_file, zip_filename)

    main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
    main_ns = "{%s}" % main
    docPr = "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
    docPr_ns = "{%s}" % docPr
    docPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
    docPr_type_ns = "{%s}" % docPr_type #i4, r8, filetime, bool, lpwstr
    docPr_rel_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
    docPr_content_type = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
    r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
    cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
    xml_declaration = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
    base_xml = '{dec}<Properties xmlns="{docPr}" xmlns:vt="{docPr_type}"></Properties>'.format(dec=xml_declaration, docPr=docPr, docPr_type=docPr_type).encode('utf-8')

    with NamedTemporaryFile() as tmp_file:
        tmpname = os.path.basename(tmp_file.name)
        with zipfile.ZipFile(zip_filename, 'r') as zip_in:
            with zipfile.ZipFile(tmpname, 'w') as zip_out:
                zip_out.comment = zip_in.comment # preserve the comment
                custom_present = 'docProps/custom.xml' in zip_in.namelist()
                for item in zip_in.infolist():
                    if item.filename == 'docProps/custom.xml':
                        custom_xml = ET.fromstring(zip_in.read(item.filename))
                    elif custom_present == False and item.filename == '_rels/.rels':
                        rels_xml = ET.fromstring(zip_in.read(item.filename))
                    elif custom_present == False and item.filename == '[Content_Types].xml':
                        content_types_xml = ET.fromstring(zip_in.read(item.filename))
                    else:
                        zip_out.writestr(item, zip_in.read(item.filename))

                if custom_present:
                    # if custom.xml is already present we just need to append:
                    max_pid = 1
                    for node in custom_xml:
                        max_pid = max(int(node.attrib['pid']), max_pid)
                else:
                    # if custom.xml is not present, we need to create it
                    # and also to add an override to [Content_Types].xml
                    # and also to add a relationship to _rels/.rels
                    custom_xml = ET.parse(BytesIO(base_xml)).getroot()
                    max_pid = 1
                    child_override = ET.SubElement(content_types_xml, "Override")
                    child_override.attrib['ContentType'] = docPr_content_type
                    child_override.attrib['PartName'] = '/docProps/custom.xml'
                    zip_out.writestr('[Content_Types].xml', ET.tostring(content_types_xml))
                    max_rid = 0
                    for node in rels_xml:
                        max_rid = max(int(node.attrib['Id'].replace("rId", "")), max_rid)
                    child_rel = ET.SubElement(rels_xml, "Relationship")
                    child_rel.attrib['Type'] = docPr_rel_type
                    child_rel.attrib['Target'] = 'docProps/custom.xml'
                    child_rel.attrib['Id'] = "rID" + str(max_rid + 1)
                    zip_out.writestr('_rels/.rels', ET.tostring(rels_xml))

                child = ET.SubElement(custom_xml, "property")
                child.attrib['name'] = cus_doc_prop_name
                child.attrib['pid'] = str(max_pid + 1)
                child.attrib['fmtid'] = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
                val = ET.SubElement(child, f"{docPr_type_ns}{docPr_type_suffix}")
                val.text = cus_doc_prop_str
                print(ET.tostring(custom_xml, pretty_print=True))
                zip_out.writestr('docProps/custom.xml', ET.tostring(custom_xml))
                zip_out.close()
            zip_in.close()
        shutil.copyfile(tmpname, zip_filename)
    os.rename(zip_filename, path_file)

#call it like this:
set_workbook_custom_document_properties(f'./example.xlsx', "testDocProp7", 2.5)