我有两个XML,如下所示。如何将其中两个与脚本合并以从new.xml&中获取值保留不存在的属性值形式base.xml
base.xml
:
<element name="ind"
dbs="name1, name2, name4"
server="ServerName"
good-attribute="234"/>
我的 new.xml 如下所示:
<element name="ind"
description="My desc"
dbId="someId"
moreAttr="someVal"
dbs="name1, name2, name4, name12, name3"
server="ServerName" />
我希望将dbs
的最新new.xml
值提取到merge.xml
并保留good-attribute
值仅出现在base.xml
中:
merge.xml
:
<element name="ind"
description="My desc"
dbId="someId"
moreAttr="someVal"
dbs="name1, name2, name4, name12, name3"
server="ServerName"
good-attribute="234" />
使用贴地paste
按字段显示,vimdiff
显示差异,但不允许选择。任何内部支持是否存在或需要使用sed + awk进行替换?
答案 0 :(得分:0)
这是Python中的直接实现:
#!/usr/bin/env python
# Python 2.5+ compatible
import sys
import xml.etree.ElementTree as etree
def main():
output_file = getattr(sys.stdout, 'buffer', sys.stdout)
dict2xml(merge_dicts(*map(xml2dict, sys.argv[1:])), output_file)
def xml2dict(source):
return dict((el.get('name'), el.attrib)
for el in etree.parse(source).getiterator('element'))
def merge_dicts(base, new):
merged = {}
# for each element from new xml
for name, attr in new.items():
# combine attributes from base and new xmls preferring new values
d = merged[name] = base.get(name, {})
d.update(attr)
return merged
def dict2xml(d, sink):
root = etree.Element('root')
for name, attr in d.items():
etree.SubElement(root, 'element', attr)
etree.ElementTree(root).write(sink, encoding='utf-8')
main()
将此代码保存到merge-xml
文件并运行chmod +x merge-xml
。然后:
$ ./merge-xml base.xml new.xml >merge.xml
这是Python 2.4+兼容版本:
#!/usr/bin/env python
import sys
from xml.dom import minidom
def main():
output_file = getattr(sys.stdout, 'buffer', sys.stdout)
dict2xml(merge_dicts(*map(xml2dict, sys.argv[1:])), output_file)
def xml2dict(source):
doc = minidom.parse(source)
return dict((el.getAttribute('name'), attr2dict(el.attributes))
for el in doc.getElementsByTagName('element'))
def attr2dict(nodemap):
d = {}
for i in range(nodemap.length):
attr = nodemap.item(i)
d[attr.name] = attr.value
return d
def merge_dicts(base, new):
merged = {}
# for each element from new xml
for name, attr in new.items():
# combine attributes from base and new xmls preferring new values
d = merged[name] = base.get(name, {})
d.update(attr)
return merged
def dict2xml(d, sink):
doc = minidom.getDOMImplementation().createDocument(None, "root", None)
root = doc.documentElement
for name, attr in d.items():
el = doc.createElement('element')
for name, value in attr.items():
el.setAttribute(name, value)
root.appendChild(el)
sink.write(doc.toprettyxml(encoding='utf-8'))
main()