如何在python中的特定级别将一些特定节点从一个xml复制到另一个xml

时间:2018-05-17 20:11:02

标签: python xml lxml

我正在尝试将一个xml文件合并到另一个xml文件中,但我将一些特定节点从Source.xml文件复制到Destination.xml文件。

这是源代码(如ExampleSource.xml) - 请注意这些数据按" safetyreportid"分组。 (将这些节点合并到目标文件时,这将是我们的关键):

<?xml version="1.0" encoding="UTF-8"?>
<drugs>
    <concmed safetyreportid="FR-10300423">
        <drug>
            <drugcharacterization>.</drugcharacterization>
            <medicinalproduct>Blinded study</medicinalproduct>
            <activesubstance>
                <activesubstancename> </activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
        <drug>
            <drugcharacterization>2</drugcharacterization>
            <medicinalproduct>METOTREXATE</medicinalproduct>
            <activesubstance>
                <activesubstancename>METHOTREXATE SODIUM</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
    </concmed>
    <concmed safetyreportid="BG-1010011">
        <drug>
            <drugcharacterization>1</drugcharacterization>
            <medicinalproduct>Medical Product</medicinalproduct>
            <activesubstance>
                <activesubstancename>ActiveSub</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
        <drug>
            <drugcharacterization>2</drugcharacterization>
            <medicinalproduct>Azerty</medicinalproduct>
            <activesubstance>
                <activesubstancename>POTATIUM</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
        <drug>
            <drugcharacterization>3</drugcharacterization>
            <medicinalproduct>Querty</medicinalproduct>
            <activesubstance>
                <activesubstancename>Plutonium</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
    </concmed>
</drugs>

现在这里是目标文件(作为ExampleDestination.xml) - 这里的节点再次由safetyreportid组织:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ichicsr SYSTEM "http://eudravigilance.ema.europa.eu/dtd/icsr21xml.dtd">
<ichicsr lang="en">
    <ichicsrmessageheader>
        <messagetype>ichicsr</messagetype>
        <messageformatversion>2.1</messageformatversion>
    </ichicsrmessageheader>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>BG-1010011</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>GivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>FAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>CYSTITIS</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="BG-1010011">
            </concmed>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>FR-10300423</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>OtherGivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>DIARRHEA</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="FR-10300423">
            </concmed>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
</ichicsr>

我设置此脚本以尝试将每个节点(和子元素)复制到适当的safetyreportid级别的目标:

import xml.etree.ElementTree as ET
from lxml import etree

def find_child(node, with_ref):
    """Recursively find a node with given ref"""
    for element in list(node):
        if element.tag == with_ref:
            return element
        elif list(element):
            sub_result = find_child(element, with_ref)
            if sub_result is not None:
                return sub_result

    return None

def replace_node(from_tree, to_tree, node_ref):
    """Replace node with given node_ref in the to_tree with the same ref from the from_tree"""
    from_node = find_child(from_tree.getroot(), node_ref)
    to_node = find_child(to_tree.getroot(), node_ref)

    #Find where to substitute the from_node into the to_tree
    to_parent, to_index = get_node_parent_info(to_tree, to_node)

    #Replace to_node with from_node
    to_parent.remove(to_node)
    to_parent.insert(to_index, from_node)

def get_node_parent_info(tree, node):
    """Return tuple of (parent, index) where:
        parent = node of parent within tree
        index = index of node under parent"""

    parent_map = {c:p for p in tree.iter() for c in p}
    parent = parent_map[node]
    return parent, list(parent).index(node)


destinationFile = etree.parse("ExampleDestination.xml")

from_tree = ET.ElementTree(file='ExampleSource.xml')
to_tree = ET.ElementTree(file='ExampleDestination.xml')

safety_ref = destinationFile.xpath("//safetyreport")

for safetyreportid in safety_ref:
    xpath_safetyreportid = safetyreportid.xpath("./safetyreportid")
    local_safetyreportid = xpath_safetyreportid[0].text
    print(local_safetyreportid)
    replace_node(from_tree, to_tree, 'concmed')

to_tree.write('ouput.xml')

因此,在ouput.xml文件中,我无法获得脚本来复制安全报告级别的药物节点。结果如下:

<ichicsr lang="en">
    <ichicsrmessageheader>
        <messagetype>ichicsr</messagetype>
        <messageformatversion>2.1</messageformatversion>
    </ichicsrmessageheader>
    <safetyreport>
        <safetyreportversion />
        <safetyreportid>BG-1010011</safetyreportid>
        <primarysource>
            <reportertitle />
            <reportergivename>GivenName</reportergivename>
            <reportermiddlename />
            <reporterfamilyname>FAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype />
            <senderorganization />
        </sender>
        <receiver>
            <receivertype />
            <receiverorganization />
        </receiver>
        <patient>
            <patientinitial />
            <patientgpmedicalrecordnumb />
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>CYSTITIS</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="FR-10300423">
                <drug>
                    <drugcharacterization>.</drugcharacterization>
                    <medicinalproduct>Blinded study</medicinalproduct>
                    <activesubstance>
                        <activesubstancename> </activesubstancename>
                    </activesubstance>
                    <drugreactionrelatedness>
                        <drugreactionassesmeddraversion />
                        <drugreactionasses />
                    </drugreactionrelatedness>
                </drug>
                <drug>
                    <drugcharacterization>2</drugcharacterization>
                    <medicinalproduct>METOTREXATE</medicinalproduct>
                    <activesubstance>
                        <activesubstancename>METHOTREXATE SODIUM</activesubstancename>
                    </activesubstance>
                    <drugreactionrelatedness>
                        <drugreactionassesmeddraversion />
                        <drugreactionasses />
                    </drugreactionrelatedness>
                </drug>
            </concmed>
            <summary>
                <narrativeincludeclinical />
                <reportercomment />
            </summary>
        </patient>
    </safetyreport>
    <safetyreport>
        <safetyreportversion />
        <safetyreportid>FR-10300423</safetyreportid>
        <primarysource>
            <reportertitle />
            <reportergivename>OtherGivenName</reportergivename>
            <reportermiddlename />
            <reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype />
            <senderorganization />
        </sender>
        <receiver>
            <receivertype />
            <receiverorganization />
        </receiver>
        <patient>
            <patientinitial />
            <patientgpmedicalrecordnumb />
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>DIARRHEA</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="FR-10300423">
            </concmed>
            <summary>
                <narrativeincludeclinical />
                <reportercomment />
            </summary>
        </patient>
    </safetyreport>
</ichicsr>

这是我的预期输出:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ichicsr SYSTEM "http://eudravigilance.ema.europa.eu/dtd/icsr21xml.dtd">
<ichicsr lang="en">
    <ichicsrmessageheader>
        <messagetype>ichicsr</messagetype>
        <messageformatversion>2.1</messageformatversion>
    </ichicsrmessageheader>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>BG-1010011</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>GivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>FAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>CYSTITIS</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <drug>
                <drugcharacterization>1</drugcharacterization>
                <medicinalproduct>Medical Product</medicinalproduct>
                <activesubstance>
                    <activesubstancename>ActiveSub</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <drug>
                <drugcharacterization>2</drugcharacterization>
                <medicinalproduct>Azerty</medicinalproduct>
                <activesubstance>
                    <activesubstancename>POTATIUM</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <drug>
                <drugcharacterization>3</drugcharacterization>
                <medicinalproduct>Querty</medicinalproduct>
                <activesubstance>
                    <activesubstancename>Plutonium</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>FR-10300423</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>OtherGivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>DIARRHEA</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <drug>
                <drugcharacterization>.</drugcharacterization>
                <medicinalproduct>Blinded study</medicinalproduct>
                <activesubstance>
                    <activesubstancename> </activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <drug>
                <drugcharacterization>2</drugcharacterization>
                <medicinalproduct>METOTREXATE</medicinalproduct>
                <activesubstance>
                    <activesubstancename>METHOTREXATE SODIUM</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
</ichicsr>

我想帮助解决这个合并过程...提前感谢您的阅读和帮助。 最好的法国

1 个答案:

答案 0 :(得分:0)

所以这里有一个解决方案,现在正在这里工作 - 这可能会帮助其他人...

import xml.etree.ElementTree as ET
from lxml import etree

def find_child(node, with_ref, with_safetyreportid):
    """Recursively find a node with given ref"""
    for element in list(node):
        if element.tag == with_ref and element.attrib['safetyreportid'] == with_safetyreportid:
            return element
        elif list(element):
            sub_result = find_child(element, with_ref, with_safetyreportid)
            if sub_result is not None:
                return sub_result

    return None

def replace_node(from_tree, to_tree, node_ref, with_safetyreportid):
    """Replace node with given node_ref in the to_tree with the same ref from the from_tree"""
    from_node = find_child(from_tree.getroot(), node_ref, with_safetyreportid)
    to_node = find_child(to_tree.getroot(), node_ref, with_safetyreportid)

    #Find where to substitute the from_node into the to_tree
    to_parent, to_index = get_node_parent_info(to_tree, to_node)

    #Replace to_node with from_node
    to_parent.remove(to_node)
    to_parent.insert(to_index, from_node)

def get_node_parent_info(tree, node):
    """Return tuple of (parent, index) where:
        parent = node of parent within tree
        index = index of node under parent"""

    parent_map = {c:p for p in tree.iter() for c in p}
    parent = parent_map[node]
    return parent, list(parent).index(node)


destinationFile = etree.parse("ExampleDestination.xml")
sourceFile = etree.parse("ExampleSource.xml")

from_tree = ET.ElementTree(file='ExampleSource.xml')
to_tree = ET.ElementTree(file='ExampleDestination.xml')

safety_dest_ref = destinationFile.xpath("//concmed")
safety_sour_ref = sourceFile.xpath("//concmed")

for each_source_concmed in safety_sour_ref:
    xpath_source_concmed = each_source_concmed.xpath(".")
    pat_ref_source = xpath_source_concmed[0].attrib['safetyreportid']
    print(pat_ref_source)
    replace_node(from_tree, to_tree, 'concmed', pat_ref_source)

to_tree.write('ouput.xml')