合并两个XML并保留base-xml中的attribute-value

时间:2013-11-12 14:49:25

标签: shell merge

我有两个XML,如下所示。如何将其中两个与脚本合并以从new.xml&中获取值保留不存在的属性值形式base.xml

base.xml

 <element name="ind"
          dbs="name1, name2, name4"
          server="ServerName"
          good-attribute="234"/>  

我的 new.xml 如下所示:

<element name="ind"
         description="My desc"
         dbId="someId"
         moreAttr="someVal"
         dbs="name1, name2, name4, name12, name3"
         server="ServerName" />

我希望将dbs的最新new.xml值提取到merge.xml并保留good-attribute值仅出现在base.xml中:

merge.xml

<element name="ind"
         description="My desc"
         dbId="someId"
         moreAttr="someVal"
         dbs="name1, name2, name4, name12, name3"
         server="ServerName"
         good-attribute="234" />

使用贴地paste按字段显示,vimdiff显示差异,但不允许选择。任何内部支持是否存在或需要使用sed + awk进行替换?

1 个答案:

答案 0 :(得分:0)

这是Python中的直接实现:

#!/usr/bin/env python
# Python 2.5+ compatible
import sys
import xml.etree.ElementTree as etree

def main():
    output_file = getattr(sys.stdout, 'buffer', sys.stdout)
    dict2xml(merge_dicts(*map(xml2dict, sys.argv[1:])), output_file)

def xml2dict(source):
    return dict((el.get('name'), el.attrib)
                for el in etree.parse(source).getiterator('element'))

def merge_dicts(base, new):
    merged = {}
    # for each element from new xml
    for name, attr in new.items():
        # combine attributes from base and new xmls preferring new values
        d = merged[name] = base.get(name, {})
        d.update(attr)
    return merged

def dict2xml(d, sink):
    root = etree.Element('root')
    for name, attr in d.items():
        etree.SubElement(root, 'element', attr)
    etree.ElementTree(root).write(sink, encoding='utf-8')

main()

将此代码保存到merge-xml文件并运行chmod +x merge-xml。然后:

$ ./merge-xml base.xml new.xml >merge.xml

这是Python 2.4+兼容版本:

#!/usr/bin/env python
import sys
from xml.dom import minidom

def main():
    output_file = getattr(sys.stdout, 'buffer', sys.stdout)
    dict2xml(merge_dicts(*map(xml2dict, sys.argv[1:])), output_file)

def xml2dict(source):
    doc = minidom.parse(source)
    return dict((el.getAttribute('name'), attr2dict(el.attributes))
                for el in doc.getElementsByTagName('element'))

def attr2dict(nodemap):
    d = {}
    for i in range(nodemap.length):
        attr = nodemap.item(i)
        d[attr.name] = attr.value
    return d

def merge_dicts(base, new):
    merged = {}
    # for each element from new xml
    for name, attr in new.items():
        # combine attributes from base and new xmls preferring new values
        d = merged[name] = base.get(name, {})
        d.update(attr)
    return merged

def dict2xml(d, sink):
    doc = minidom.getDOMImplementation().createDocument(None, "root", None)
    root = doc.documentElement
    for name, attr in d.items():
        el = doc.createElement('element')
        for name, value in attr.items():
            el.setAttribute(name, value)
        root.appendChild(el)
    sink.write(doc.toprettyxml(encoding='utf-8'))

main()