难以创建lxml元素子类

时间:2013-09-30 14:11:32

标签: python lxml docx python-docx

我正在尝试创建Element类的子类。我开始时遇到了麻烦。

from lxml import etree
try:
    import docx
except ImportError:
    from docx import docx

class File(etree.ElementBase):
    def _init(self):
        etree.ElementBase._init(self)
        self.body = self.append(docx.makeelement('body'))

f = File()
relationships = docx.relationshiplist()
title    = 'File' 
subject  = 'A very special File'
creator  = 'Me'
keywords = ['python', 'Office Open XML', 'Word']
coreprops = docx.coreproperties(title=title, subject=subject, creator=creator,
    keywords=keywords)
appprops = docx.appproperties()
contenttypes = docx.contenttypes()
websettings = docx.websettings()
wordrelationships = docx.wordrelationships(relationships)
docx.savedocx(f, coreprops, appprops, contenttypes, websettings,
wordrelationships, 'file.docx')

当我尝试打开从此代码输出的文档时,我的Word版本(带有兼容包的2003)给出了以下错误:“此文件是由以前的Word 2007测试版创建的,无法打开在这个版本中。“当我用一个用docx.newdocument()创建的不同元素替换File对象时,该文档很好。有什么想法/建议吗?

1 个答案:

答案 0 :(得分:0)

我真的不明白为什么要使用名为File的单独类。

正如Michael0x2a所说,你没有放置文档标签,所以它不起作用(我不认为Word 2007也可以读取你的文件)

但这是更正后的代码:

from lxml import etree
try:
    import docx
except ImportError:
    from docx import docx

class File(object):
    def makeelement(tagname, tagtext=None, nsprefix='w', attributes=None,
                    attrnsprefix=None):
        '''Create an element & return it'''
        # Deal with list of nsprefix by making namespacemap
        namespacemap = None
        if isinstance(nsprefix, list):
            namespacemap = {}
            for prefix in nsprefix:
                namespacemap[prefix] = nsprefixes[prefix]
            # FIXME: rest of code below expects a single prefix
            nsprefix = nsprefix[0]
        if nsprefix:
            namespace = '{'+nsprefixes[nsprefix]+'}'
        else:
            # For when namespace = None
            namespace = ''
        newelement = etree.Element(namespace+tagname, nsmap=namespacemap)
        # Add attributes with namespaces
        if attributes:
            # If they haven't bothered setting attribute namespace, use an empty
            # string (equivalent of no namespace)
            if not attrnsprefix:
                # Quick hack: it seems every element that has a 'w' nsprefix for
                # its tag uses the same prefix for it's attributes
                if nsprefix == 'w':
                    attributenamespace = namespace
                else:
                    attributenamespace = ''
            else:
                attributenamespace = '{'+nsprefixes[attrnsprefix]+'}'

            for tagattribute in attributes:
                newelement.set(attributenamespace+tagattribute,
                               attributes[tagattribute])
        if tagtext:
            newelement.text = tagtext
        return newelement

    def __init__(self):
        super(File,self).__init__()
        self.document = self.makeelement('document')
        self.document.append(self.makeelement('body'))


f = File()
relationships = docx.relationshiplist()
title    = 'File' 
subject  = 'A very special File'
creator  = 'Me'
keywords = ['python', 'Office Open XML', 'Word']
coreprops = docx.coreproperties(title=title, subject=subject, creator=creator,
    keywords=keywords)
appprops = docx.appproperties()
contenttypes = docx.contenttypes()
websettings = docx.websettings()
wordrelationships = docx.wordrelationships(relationships)
docx.savedocx(f.document, coreprops, appprops, contenttypes, websettings,
wordrelationships, 'file.docx')