我正在尝试将一个xml文件合并到另一个xml文件中,但我将一些特定节点从Source.xml文件复制到Destination.xml文件。
这是源代码(如ExampleSource.xml) - 请注意这些数据按" safetyreportid"分组。 (将这些节点合并到目标文件时,这将是我们的关键):
<?xml version="1.0" encoding="UTF-8"?>
<drugs>
<concmed safetyreportid="FR-10300423">
<drug>
<drugcharacterization>.</drugcharacterization>
<medicinalproduct>Blinded study</medicinalproduct>
<activesubstance>
<activesubstancename> </activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<drug>
<drugcharacterization>2</drugcharacterization>
<medicinalproduct>METOTREXATE</medicinalproduct>
<activesubstance>
<activesubstancename>METHOTREXATE SODIUM</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
</concmed>
<concmed safetyreportid="BG-1010011">
<drug>
<drugcharacterization>1</drugcharacterization>
<medicinalproduct>Medical Product</medicinalproduct>
<activesubstance>
<activesubstancename>ActiveSub</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<drug>
<drugcharacterization>2</drugcharacterization>
<medicinalproduct>Azerty</medicinalproduct>
<activesubstance>
<activesubstancename>POTATIUM</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<drug>
<drugcharacterization>3</drugcharacterization>
<medicinalproduct>Querty</medicinalproduct>
<activesubstance>
<activesubstancename>Plutonium</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
</concmed>
</drugs>
现在这里是目标文件(作为ExampleDestination.xml) - 这里的节点再次由safetyreportid组织:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ichicsr SYSTEM "http://eudravigilance.ema.europa.eu/dtd/icsr21xml.dtd">
<ichicsr lang="en">
<ichicsrmessageheader>
<messagetype>ichicsr</messagetype>
<messageformatversion>2.1</messageformatversion>
</ichicsrmessageheader>
<safetyreport>
<safetyreportversion/>
<safetyreportid>BG-1010011</safetyreportid>
<primarysource>
<reportertitle/>
<reportergivename>GivenName</reportergivename>
<reportermiddlename></reportermiddlename>
<reporterfamilyname>FAMILYNAME</reporterfamilyname>
</primarysource>
<sender>
<sendertype></sendertype>
<senderorganization></senderorganization>
</sender>
<receiver>
<receivertype></receivertype>
<receiverorganization></receiverorganization>
</receiver>
<patient>
<patientinitial></patientinitial>
<patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
<medicalhistoryepisode>
</medicalhistoryepisode>
<reaction>
<primarysourcereaction>CYSTITIS</primarysourcereaction>
<reactionmeddraversionllt>20.1</reactionmeddraversionllt>
</reaction>
<concmed safetyreportid="BG-1010011">
</concmed>
<summary>
<narrativeincludeclinical></narrativeincludeclinical>
<reportercomment></reportercomment>
</summary>
</patient>
</safetyreport>
<safetyreport>
<safetyreportversion/>
<safetyreportid>FR-10300423</safetyreportid>
<primarysource>
<reportertitle/>
<reportergivename>OtherGivenName</reportergivename>
<reportermiddlename></reportermiddlename>
<reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
</primarysource>
<sender>
<sendertype></sendertype>
<senderorganization></senderorganization>
</sender>
<receiver>
<receivertype></receivertype>
<receiverorganization></receiverorganization>
</receiver>
<patient>
<patientinitial></patientinitial>
<patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
<medicalhistoryepisode>
</medicalhistoryepisode>
<reaction>
<primarysourcereaction>DIARRHEA</primarysourcereaction>
<reactionmeddraversionllt>20.1</reactionmeddraversionllt>
</reaction>
<concmed safetyreportid="FR-10300423">
</concmed>
<summary>
<narrativeincludeclinical></narrativeincludeclinical>
<reportercomment></reportercomment>
</summary>
</patient>
</safetyreport>
</ichicsr>
我设置此脚本以尝试将每个节点(和子元素)复制到适当的safetyreportid级别的目标:
import xml.etree.ElementTree as ET
from lxml import etree
def find_child(node, with_ref):
"""Recursively find a node with given ref"""
for element in list(node):
if element.tag == with_ref:
return element
elif list(element):
sub_result = find_child(element, with_ref)
if sub_result is not None:
return sub_result
return None
def replace_node(from_tree, to_tree, node_ref):
"""Replace node with given node_ref in the to_tree with the same ref from the from_tree"""
from_node = find_child(from_tree.getroot(), node_ref)
to_node = find_child(to_tree.getroot(), node_ref)
#Find where to substitute the from_node into the to_tree
to_parent, to_index = get_node_parent_info(to_tree, to_node)
#Replace to_node with from_node
to_parent.remove(to_node)
to_parent.insert(to_index, from_node)
def get_node_parent_info(tree, node):
"""Return tuple of (parent, index) where:
parent = node of parent within tree
index = index of node under parent"""
parent_map = {c:p for p in tree.iter() for c in p}
parent = parent_map[node]
return parent, list(parent).index(node)
destinationFile = etree.parse("ExampleDestination.xml")
from_tree = ET.ElementTree(file='ExampleSource.xml')
to_tree = ET.ElementTree(file='ExampleDestination.xml')
safety_ref = destinationFile.xpath("//safetyreport")
for safetyreportid in safety_ref:
xpath_safetyreportid = safetyreportid.xpath("./safetyreportid")
local_safetyreportid = xpath_safetyreportid[0].text
print(local_safetyreportid)
replace_node(from_tree, to_tree, 'concmed')
to_tree.write('ouput.xml')
因此,在ouput.xml文件中,我无法获得脚本来复制安全报告级别的药物节点。结果如下:
<ichicsr lang="en">
<ichicsrmessageheader>
<messagetype>ichicsr</messagetype>
<messageformatversion>2.1</messageformatversion>
</ichicsrmessageheader>
<safetyreport>
<safetyreportversion />
<safetyreportid>BG-1010011</safetyreportid>
<primarysource>
<reportertitle />
<reportergivename>GivenName</reportergivename>
<reportermiddlename />
<reporterfamilyname>FAMILYNAME</reporterfamilyname>
</primarysource>
<sender>
<sendertype />
<senderorganization />
</sender>
<receiver>
<receivertype />
<receiverorganization />
</receiver>
<patient>
<patientinitial />
<patientgpmedicalrecordnumb />
<medicalhistoryepisode>
</medicalhistoryepisode>
<reaction>
<primarysourcereaction>CYSTITIS</primarysourcereaction>
<reactionmeddraversionllt>20.1</reactionmeddraversionllt>
</reaction>
<concmed safetyreportid="FR-10300423">
<drug>
<drugcharacterization>.</drugcharacterization>
<medicinalproduct>Blinded study</medicinalproduct>
<activesubstance>
<activesubstancename> </activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion />
<drugreactionasses />
</drugreactionrelatedness>
</drug>
<drug>
<drugcharacterization>2</drugcharacterization>
<medicinalproduct>METOTREXATE</medicinalproduct>
<activesubstance>
<activesubstancename>METHOTREXATE SODIUM</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion />
<drugreactionasses />
</drugreactionrelatedness>
</drug>
</concmed>
<summary>
<narrativeincludeclinical />
<reportercomment />
</summary>
</patient>
</safetyreport>
<safetyreport>
<safetyreportversion />
<safetyreportid>FR-10300423</safetyreportid>
<primarysource>
<reportertitle />
<reportergivename>OtherGivenName</reportergivename>
<reportermiddlename />
<reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
</primarysource>
<sender>
<sendertype />
<senderorganization />
</sender>
<receiver>
<receivertype />
<receiverorganization />
</receiver>
<patient>
<patientinitial />
<patientgpmedicalrecordnumb />
<medicalhistoryepisode>
</medicalhistoryepisode>
<reaction>
<primarysourcereaction>DIARRHEA</primarysourcereaction>
<reactionmeddraversionllt>20.1</reactionmeddraversionllt>
</reaction>
<concmed safetyreportid="FR-10300423">
</concmed>
<summary>
<narrativeincludeclinical />
<reportercomment />
</summary>
</patient>
</safetyreport>
</ichicsr>
这是我的预期输出:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ichicsr SYSTEM "http://eudravigilance.ema.europa.eu/dtd/icsr21xml.dtd">
<ichicsr lang="en">
<ichicsrmessageheader>
<messagetype>ichicsr</messagetype>
<messageformatversion>2.1</messageformatversion>
</ichicsrmessageheader>
<safetyreport>
<safetyreportversion/>
<safetyreportid>BG-1010011</safetyreportid>
<primarysource>
<reportertitle/>
<reportergivename>GivenName</reportergivename>
<reportermiddlename></reportermiddlename>
<reporterfamilyname>FAMILYNAME</reporterfamilyname>
</primarysource>
<sender>
<sendertype></sendertype>
<senderorganization></senderorganization>
</sender>
<receiver>
<receivertype></receivertype>
<receiverorganization></receiverorganization>
</receiver>
<patient>
<patientinitial></patientinitial>
<patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
<medicalhistoryepisode>
</medicalhistoryepisode>
<reaction>
<primarysourcereaction>CYSTITIS</primarysourcereaction>
<reactionmeddraversionllt>20.1</reactionmeddraversionllt>
</reaction>
<drug>
<drugcharacterization>1</drugcharacterization>
<medicinalproduct>Medical Product</medicinalproduct>
<activesubstance>
<activesubstancename>ActiveSub</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<drug>
<drugcharacterization>2</drugcharacterization>
<medicinalproduct>Azerty</medicinalproduct>
<activesubstance>
<activesubstancename>POTATIUM</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<drug>
<drugcharacterization>3</drugcharacterization>
<medicinalproduct>Querty</medicinalproduct>
<activesubstance>
<activesubstancename>Plutonium</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<summary>
<narrativeincludeclinical></narrativeincludeclinical>
<reportercomment></reportercomment>
</summary>
</patient>
</safetyreport>
<safetyreport>
<safetyreportversion/>
<safetyreportid>FR-10300423</safetyreportid>
<primarysource>
<reportertitle/>
<reportergivename>OtherGivenName</reportergivename>
<reportermiddlename></reportermiddlename>
<reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
</primarysource>
<sender>
<sendertype></sendertype>
<senderorganization></senderorganization>
</sender>
<receiver>
<receivertype></receivertype>
<receiverorganization></receiverorganization>
</receiver>
<patient>
<patientinitial></patientinitial>
<patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
<medicalhistoryepisode>
</medicalhistoryepisode>
<reaction>
<primarysourcereaction>DIARRHEA</primarysourcereaction>
<reactionmeddraversionllt>20.1</reactionmeddraversionllt>
</reaction>
<drug>
<drugcharacterization>.</drugcharacterization>
<medicinalproduct>Blinded study</medicinalproduct>
<activesubstance>
<activesubstancename> </activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<drug>
<drugcharacterization>2</drugcharacterization>
<medicinalproduct>METOTREXATE</medicinalproduct>
<activesubstance>
<activesubstancename>METHOTREXATE SODIUM</activesubstancename>
</activesubstance>
<drugreactionrelatedness>
<drugreactionassesmeddraversion></drugreactionassesmeddraversion>
<drugreactionasses></drugreactionasses>
</drugreactionrelatedness>
</drug>
<summary>
<narrativeincludeclinical></narrativeincludeclinical>
<reportercomment></reportercomment>
</summary>
</patient>
</safetyreport>
</ichicsr>
我想帮助解决这个合并过程...提前感谢您的阅读和帮助。 最好的法国
答案 0 :(得分:0)
所以这里有一个解决方案,现在正在这里工作 - 这可能会帮助其他人...
import xml.etree.ElementTree as ET
from lxml import etree
def find_child(node, with_ref, with_safetyreportid):
"""Recursively find a node with given ref"""
for element in list(node):
if element.tag == with_ref and element.attrib['safetyreportid'] == with_safetyreportid:
return element
elif list(element):
sub_result = find_child(element, with_ref, with_safetyreportid)
if sub_result is not None:
return sub_result
return None
def replace_node(from_tree, to_tree, node_ref, with_safetyreportid):
"""Replace node with given node_ref in the to_tree with the same ref from the from_tree"""
from_node = find_child(from_tree.getroot(), node_ref, with_safetyreportid)
to_node = find_child(to_tree.getroot(), node_ref, with_safetyreportid)
#Find where to substitute the from_node into the to_tree
to_parent, to_index = get_node_parent_info(to_tree, to_node)
#Replace to_node with from_node
to_parent.remove(to_node)
to_parent.insert(to_index, from_node)
def get_node_parent_info(tree, node):
"""Return tuple of (parent, index) where:
parent = node of parent within tree
index = index of node under parent"""
parent_map = {c:p for p in tree.iter() for c in p}
parent = parent_map[node]
return parent, list(parent).index(node)
destinationFile = etree.parse("ExampleDestination.xml")
sourceFile = etree.parse("ExampleSource.xml")
from_tree = ET.ElementTree(file='ExampleSource.xml')
to_tree = ET.ElementTree(file='ExampleDestination.xml')
safety_dest_ref = destinationFile.xpath("//concmed")
safety_sour_ref = sourceFile.xpath("//concmed")
for each_source_concmed in safety_sour_ref:
xpath_source_concmed = each_source_concmed.xpath(".")
pat_ref_source = xpath_source_concmed[0].attrib['safetyreportid']
print(pat_ref_source)
replace_node(from_tree, to_tree, 'concmed', pat_ref_source)
to_tree.write('ouput.xml')