XSLT - 比较和组合两个XML文件

时间:2017-06-14 13:22:59

标签: xml xslt xpath compare

我有一个非常复杂的XML文件和一个更简单的XML文件,其中包含对复杂文件的一些修正。

复杂的一个元素:

<h1:Document DocKey="obj     40020528">
      <h1:Block Type="obj"
                CreationDate="20.03.2014 09:39:50"
                CreatorID="Admin"
                ChangeDate="21.01.2015 14:40:51"
                ChangerID="Admin"
                OwnerID="Admin"
                FieldsCount="17">
         <h1:Field Type="5000" Value="40020528"/>
         <h1:Field Type="5060" Value="Aufnahme">
            <h1:Field Type="5064" Value="unbekannt"/>
         </h1:Field>
         <h1:Field Type="ob28" Value="Verwalter">
            <h1:Field Type="2864" Value="Köln"/>
            <h1:Field Type="2900"
                      Value="Theaterwissenschaftliche Sammlung, Universität zu Köln"/>
            <h1:Field Type="2930" Value="Fotoabteilung"/>
            <h1:Field Type="2950" Value="TWS_FGL00541"/>
            <h1:Field Type="907d" Value="No: 4260"/>
         </h1:Field>
         <h1:Field Type="ob26" Value="Aufnahmeort">
            <h1:Field Type="2664" Value="unbekannt"/>
            <h1:Field Type="2690" Value="Theater"/>
            <h1:Field Type="2700" Value="Theater unbekannt"/>
         </h1:Field>
         <h1:Field Type="ob30" Value="Herstellung">
            <h1:Field Type="3100" Value="unbekannt"/>
            <h1:Field Type="3475" Value="Fotograf/in, Atelier"/>
         </h1:Field>
         <h1:Field Type="5007" Value="Darstellung">
            <h1:Field Type="5009" Value="unbekannt"/>
            <h1:Field Type="5010" Value="Autor/in"/>
            <h1:Field Type="5013" Value="xTITELx"/>
         </h1:Field>
         <h1:Field Type="ob40" Value="Inszenierung">
            <h1:Field Type="4100" Value="unbekannt"/>
            <h1:Field Type="4475" Value="Regie"/>
         </h1:Field>
         <h1:Field Type="ob40" Value="Inszenierung">
            <h1:Field Type="4100" Value="unbekannt"/>
            <h1:Field Type="4475" Value="Bühnenbild"/>
         </h1:Field>
         <h1:Field Type="5200" Value="Fritz Feinhals"/>
         <h1:Field Type="5220" Value="Fotografie"/>
         <h1:Field Type="5230" Value="Negativ"/>
         <h1:Field Type="5240" Value="Glasplattennegativ"/>
         <h1:Field Type="5360" Value="18x13"/>
         <h1:Field Type="55th" Value="Feinhals, Fritz"/>
      </h1:Block>
   </h1:Document>

简单的一个元素:

<signatur id="TWS_FGL00541">
      <datum/>
      <ort/>
      <ortsteil/>
      <titel>Fritz Feinhals als Wotan</titel>
      <fotograf/>
      <komponist>Wagner, Richard</komponist>
      <author/>
      <regie/>
      <buehnenbild/>
      <darPerson/>
      <freiText>Walküre</freiText>
   </signatur>

我试图按照&#34; Signatur&#34;比较两个文件的元素。如果签名相同,则进行更正。如果存在更正,则必须检查哪些来自更简单XML的子元素必须放在复杂的子元素中,因此必须检查简单元素的子元素是否具有值,如果因此,该值必须放在复杂的元素中。

我不知道,这样做的最佳方法是什么。我正在尝试如下的事情:

<xsl:template name="alles" match="node()|@*">
    <xsl:copy>
        <xsl:apply-templates select="node()|@*"/>
    </xsl:copy>
</xsl:template>

<xsl:variable name="Halft" select="document('simpleOne.xml')" />

<xsl:variable name="Sig_Halft" select="preceding-sibling::$Halft/document/signatur" />
<xsl:variable name="Sig_HiDA" select="preceding-sibling::DocumentSet/Document/Block/Field[@Type='ob28']/Field[@Type='2950']/@Value" />


<!-- <xsl:variable name="" select="" /> -->

<xsl:template match="DocumentSet/Document">
    <xsl:if test="$Sig_HiDA = $Sig_Halft"> <!-- wenn signatur gleich -->
        xcbcvncvn
        <!-- Datum -->
        <xsl:if test="((DocumentSet/Document/Block/Field[@Type='5060']/Field[@Type='5064']/@Value != '') or (DocumentSet/Document/Block/Field[@Type='5060']/Field[@Type='5064']/@Value != $Halft/document/signatur/datum))">

        </xsl:if>

        <!-- Ort -->
        <xsl:if test="((DocumentSet/Document/Block/Field[@Type='ob26']/Field[@Type='2664']/@Value != '') or (DocumentSet/Document/Block/Field[@Type='ob26']/Field[@Type='2664']/@Value != $Halft/document/signatur/ort))">

        </xsl:if>

        <!-- Ortsteil(Theater) -->
        <xsl:if test="((DocumentSet/Document/Block/Field[@Type='ob26']/Field[@Type='2700']/@Value != '') or (DocumentSet/Document/Block/Field[@Type='ob26']/Field[@Type='2700']/@Value != $Halft/document/signatur/ortsteil))">

        </xsl:if>

        <!-- Titel -->
        <xsl:if test="((DocumentSet/Document/Block/Field[@Type='5200']/@Value != '') or (DocumentSet/Document/Block/Field[@Type='5200']/@Value != $Halft/document/signatur/titel))">
            <h1:Field Type="5200" Value="{$Halft/document/signatur/titel}"/>
        </xsl:if>

        <!-- Fotograf -->
        <xsl:if test="((DocumentSet/Document/Block/Field[@Type='ob30']/Field[@Type='3100']/@Value != '') or (DocumentSet/Document/Block/Field[@Type='ob30']/Field[@Type='3100']/@Value != $Halft/document/signatur/fotograf))">

        </xsl:if>

        <!-- Komponist -->
        <xsl:if test="((DocumentSet/Document/Block/Field[@Type='5007']/Field[@Type='5009']/@Value != '') or (DocumentSet/Document/Block/Field[@Type='5007']/Field[@Type='5009']/@Value != $Halft/document/signatur/komponist))">

        </xsl:if>
    </xsl:if>
 </xsl:template>

1 个答案:

答案 0 :(得分:0)

以下XSLT

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="xs"
    version="2.0">

    <xsl:template name="identity" match="node()|@*">
        <xsl:copy>
            <xsl:apply-templates select="node()|@*"/>
        </xsl:copy>
    </xsl:template>

    <xsl:variable name="rework" select="document('simpleOne.xml')" />

    <!-- matching all documents that have a counterpart in 'simpleOne.xml'; others will be copied as is -->
    <xsl:template match="*:Document[.//*:Field[@Type='2950']/@Value=$rework//*:signatur/@id]">
        <xsl:copy>
            <xsl:copy-of select="@*"/>
            <xsl:apply-templates>
                <!-- handing over the current snippet/node from 'simpleOne.xml' -->
                <xsl:with-param name="currentMergeInfo" select="$rework//*:signatur[@id = current()//*:Field[@Type='2950']/@Value]" as="node()" tunnel="yes"/>
            </xsl:apply-templates>
        </xsl:copy>
    </xsl:template>

    <!-- templates for actual replacement of the values -->
    <xsl:template match="*:Field[@Type='3475']">
        <xsl:param name="currentMergeInfo" tunnel="yes"/>
        <xsl:choose>
            <xsl:when test="not($currentMergeInfo = '')">
                <xsl:copy>
                    <xsl:copy-of select="@* except @Value"/>
                    <!-- outputting the new value -->
                    <xsl:attribute name="Value">
                        <xsl:value-of select="$currentMergeInfo//*:fotograf"/>
                    </xsl:attribute>
                </xsl:copy>        
            </xsl:when>
            <!-- if the parameter is an empty string just copy what is already there -->
            <xsl:otherwise>
                <xsl:copy-of select="."/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

</xsl:stylesheet>

转换您的(略微扩展的)输入

<?xml version="1.0" encoding="UTF-8"?>
<h1:DocumentSet xmlns:h1="someNamespace">
    <h1:Document DocKey="obj     40020528">
        <h1:Block Type="obj"
            CreationDate="20.03.2014 09:39:50"
            CreatorID="Admin"
            ChangeDate="21.01.2015 14:40:51"
            ChangerID="Admin"
            OwnerID="Admin"
            FieldsCount="17">
            <h1:Field Type="5000" Value="40020528"/>
            <h1:Field Type="5060" Value="Aufnahme">
                <h1:Field Type="5064" Value="unbekannt"/>
            </h1:Field>
            <h1:Field Type="ob28" Value="Verwalter">
                <h1:Field Type="2864" Value="Köln"/>
                <h1:Field Type="2900"
                    Value="Theaterwissenschaftliche Sammlung, Universität zu Köln"/>
                <h1:Field Type="2930" Value="Fotoabteilung"/>
                <h1:Field Type="2950" Value="TWS_FGL00541"/>
                <h1:Field Type="907d" Value="No: 4260"/>
            </h1:Field>
            <h1:Field Type="ob26" Value="Aufnahmeort">
                <h1:Field Type="2664" Value="unbekannt"/>
                <h1:Field Type="2690" Value="Theater"/>
                <h1:Field Type="2700" Value="Theater unbekannt"/>
            </h1:Field>
            <h1:Field Type="ob30" Value="Herstellung">
                <h1:Field Type="3100" Value="unbekannt"/>
                <h1:Field Type="3475" Value="Fotograf/in, Atelier"/>
            </h1:Field>
            <h1:Field Type="5007" Value="Darstellung">
                <h1:Field Type="5009" Value="unbekannt"/>
                <h1:Field Type="5010" Value="Autor/in"/>
                <h1:Field Type="5013" Value="xTITELx"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Regie"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Bühnenbild"/>
            </h1:Field>
            <h1:Field Type="5200" Value="Fritz Feinhals"/>
            <h1:Field Type="5220" Value="Fotografie"/>
            <h1:Field Type="5230" Value="Negativ"/>
            <h1:Field Type="5240" Value="Glasplattennegativ"/>
            <h1:Field Type="5360" Value="18x13"/>
            <h1:Field Type="55th" Value="Feinhals, Fritz"/>
        </h1:Block>
    </h1:Document>
    <h1:Document DocKey="obj     40020528">
        <h1:Block Type="obj"
            CreationDate="20.03.2014 09:39:50"
            CreatorID="Admin"
            ChangeDate="21.01.2015 14:40:51"
            ChangerID="Admin"
            OwnerID="Admin"
            FieldsCount="17">
            <h1:Field Type="5000" Value="someValue"/>
            <h1:Field Type="5060" Value="Aufnahme">
                <h1:Field Type="5064" Value="unbekannt"/>
            </h1:Field>
            <h1:Field Type="ob28" Value="Verwalter">
                <h1:Field Type="2864" Value="somePlace"/>
                <h1:Field Type="2900"
                    Value="Theaterwissenschaftliche Sammlung, Universität zu Köln"/>
                <h1:Field Type="2930" Value="Fotoabteilung"/>
                <h1:Field Type="2950" Value="sampleID"/>
                <h1:Field Type="907d" Value="someNumber"/>
            </h1:Field>
            <h1:Field Type="ob26" Value="Aufnahmeort">
                <h1:Field Type="2664" Value="unbekannt"/>
                <h1:Field Type="2690" Value="someEntity"/>
                <h1:Field Type="2700" Value="someEntity"/>
            </h1:Field>
            <h1:Field Type="ob30" Value="Herstellung">
                <h1:Field Type="3100" Value="unbekannt"/>
                <h1:Field Type="3475" Value="Fotograf/in, Atelier"/>
            </h1:Field>
            <h1:Field Type="5007" Value="Darstellung">
                <h1:Field Type="5009" Value="unbekannt"/>
                <h1:Field Type="5010" Value="Autor/in"/>
                <h1:Field Type="5013" Value="xTITELx"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Regie"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Bühnenbild"/>
            </h1:Field>
            <h1:Field Type="5200" Value="someName"/>
            <h1:Field Type="5220" Value="someType"/>
            <h1:Field Type="5230" Value="someFormat"/>
            <h1:Field Type="5240" Value="someSpecification"/>
            <h1:Field Type="5360" Value="someFormat"/>
            <h1:Field Type="55th" Value="someName"/>
        </h1:Block>
    </h1:Document>
</h1:DocumentSet>

基于simpleOne.xml

<?xml version="1.0" encoding="UTF-8"?>
<signaturen>
    <signatur id="TWS_FGL00541">
        <datum/>
        <ort/>
        <ortsteil/>
        <titel>Fritz Feinhals als Wotan</titel>
        <fotograf/>
        <komponist>Wagner, Richard</komponist>
        <author/>
        <regie/>
        <buehnenbild/>
        <darPerson/>
        <freiText>Walküre</freiText>
    </signatur>
    <signatur id="sampleID">
        <datum/>
        <ort/>
        <ortsteil/>
        <titel>sample title</titel>
        <fotograf>sample photographer</fotograf>
        <komponist>sample composer</komponist>
        <author/>
        <regie/>
        <buehnenbild/>
        <darPerson/>
        <freiText>sample text</freiText>
    </signatur>
</signaturen>

进入输出

<?xml version="1.0" encoding="UTF-8"?>
<h1:DocumentSet xmlns:h1="someNamespace">
    <h1:Document DocKey="obj     40020528">
        <h1:Block Type="obj" CreationDate="20.03.2014 09:39:50" CreatorID="Admin" ChangeDate="21.01.2015 14:40:51" ChangerID="Admin" OwnerID="Admin" FieldsCount="17">
            <h1:Field Type="5000" Value="40020528"/>
            <h1:Field Type="5060" Value="Aufnahme">
                <h1:Field Type="5064" Value="unbekannt"/>
            </h1:Field>
            <h1:Field Type="ob28" Value="Verwalter">
                <h1:Field Type="2864" Value="Köln"/>
                <h1:Field Type="2900" Value="Theaterwissenschaftliche Sammlung, Universität zu Köln"/>
                <h1:Field Type="2930" Value="Fotoabteilung"/>
                <h1:Field Type="2950" Value="TWS_FGL00541"/>
                <h1:Field Type="907d" Value="No: 4260"/>
            </h1:Field>
            <h1:Field Type="ob26" Value="Aufnahmeort">
                <h1:Field Type="2664" Value="unbekannt"/>
                <h1:Field Type="2690" Value="Theater"/>
                <h1:Field Type="2700" Value="Theater unbekannt"/>
            </h1:Field>
            <h1:Field Type="ob30" Value="Herstellung">
                <h1:Field Type="3100" Value="unbekannt"/>
                <h1:Field Type="3475" Value="Max Muster"/>
            </h1:Field>
            <h1:Field Type="5007" Value="Darstellung">
                <h1:Field Type="5009" Value="unbekannt"/>
                <h1:Field Type="5010" Value="Autor/in"/>
                <h1:Field Type="5013" Value="xTITELx"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Regie"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Bühnenbild"/>
            </h1:Field>
            <h1:Field Type="5200" Value="Fritz Feinhals"/>
            <h1:Field Type="5220" Value="Fotografie"/>
            <h1:Field Type="5230" Value="Negativ"/>
            <h1:Field Type="5240" Value="Glasplattennegativ"/>
            <h1:Field Type="5360" Value="18x13"/>
            <h1:Field Type="55th" Value="Feinhals, Fritz"/>
        </h1:Block>
    </h1:Document>
    <h1:Document DocKey="obj     40020528">
        <h1:Block Type="obj" CreationDate="20.03.2014 09:39:50" CreatorID="Admin" ChangeDate="21.01.2015 14:40:51" ChangerID="Admin" OwnerID="Admin" FieldsCount="17">
            <h1:Field Type="5000" Value="someValue"/>
            <h1:Field Type="5060" Value="Aufnahme">
                <h1:Field Type="5064" Value="unbekannt"/>
            </h1:Field>
            <h1:Field Type="ob28" Value="Verwalter">
                <h1:Field Type="2864" Value="somePlace"/>
                <h1:Field Type="2900" Value="Theaterwissenschaftliche Sammlung, Universität zu Köln"/>
                <h1:Field Type="2930" Value="Fotoabteilung"/>
                <h1:Field Type="2950" Value="sampleID"/>
                <h1:Field Type="907d" Value="someNumber"/>
            </h1:Field>
            <h1:Field Type="ob26" Value="Aufnahmeort">
                <h1:Field Type="2664" Value="unbekannt"/>
                <h1:Field Type="2690" Value="someEntity"/>
                <h1:Field Type="2700" Value="someEntity"/>
            </h1:Field>
            <h1:Field Type="ob30" Value="Herstellung">
                <h1:Field Type="3100" Value="unbekannt"/>
                <h1:Field Type="3475" Value="sample photographer"/>
            </h1:Field>
            <h1:Field Type="5007" Value="Darstellung">
                <h1:Field Type="5009" Value="unbekannt"/>
                <h1:Field Type="5010" Value="Autor/in"/>
                <h1:Field Type="5013" Value="xTITELx"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Regie"/>
            </h1:Field>
            <h1:Field Type="ob40" Value="Inszenierung">
                <h1:Field Type="4100" Value="unbekannt"/>
                <h1:Field Type="4475" Value="Bühnenbild"/>
            </h1:Field>
            <h1:Field Type="5200" Value="someName"/>
            <h1:Field Type="5220" Value="someType"/>
            <h1:Field Type="5230" Value="someFormat"/>
            <h1:Field Type="5240" Value="someSpecification"/>
            <h1:Field Type="5360" Value="someFormat"/>
            <h1:Field Type="55th" Value="someName"/>
        </h1:Block>
    </h1:Document>
</h1:DocumentSet>

到目前为止,此转换仅关注@type="3475"字段,因为我可以轻松地将其映射到新值。我们的想法是为signatur的其他孩子创建类似的模板。

注意:我在h1:Document中包含了两个h1:DocumentSet个节点,以防您需要同时处理多个“文档”。同样,我在simpleOne.xml中创建了一个包装元素。如果逐个处理文件,应该很容易删除它。此外,我使用通配符来防范名称空间 - 这绝对不是最佳做法,请尝试正确使用它们。