基于属性序列比较2个节点集

时间:2011-07-14 13:56:40

标签: xslt xslt-2.0

我正在尝试构建一种库XML,比较各种节点并将它们组合起来以便以后重用。逻辑应该相当简单,如果给定语言的tag_XX属性值序列等于另一种语言的tag_YY属性值序列,则可以组合节点。见下面的XML示例

<Book>
<Section>
    <GB>
        <Para tag_GB="L1">
            <Content_GB>string_1</Content_GB>
        </Para>
        <Para tag_GB="Illanc">
            <Content_GB>string_2</Content_GB>
        </Para>
        <Para tag_GB="|PLB">
            <Content_GB>string_3</Content_GB>
        </Para>
        <Para tag_GB="L1">
            <Content_GB>string_4</Content_GB>
        </Para>
        <Para tag_GB="Sub">
            <Content_GB>string_5</Content_GB>
        </Para>
        <Para tag_GB="L3">
            <Content_GB>string_6</Content_GB>
        </Para>
        <Para tag_GB="Subbull">
            <Content_GB>string_7</Content_GB>
        </Para>
    </GB>
    <!-- German translations - OK because same attribute sequence -->
    <DE>
        <Para tag_DE="L1">
            <Content_DE>German_translation of_string_1</Content_DE>
        </Para>
        <Para tag_DE="Illanc">
            <Content_DE>German_translation of_string_2</Content_DE>
        </Para>
        <Para tag_DE="|PLB">
            <Content_DE>German_translation of_string_3</Content_DE>
        </Para>
        <Para tag_DE="L1">
            <Content_DE>German_translation of_string_4</Content_DE>
        </Para>
        <Para tag_DE="Sub">
            <Content_DE>German_translation of_string_5</Content_DE>
        </Para>
        <Para tag_DE="L3">
            <Content_DE>German_translation of_string_6</Content_DE>
        </Para>
        <Para tag_DE="Subbull">
            <Content_DE>German_translation of_string_7</Content_DE>
        </Para>
    </DE>
    <!-- Danish translations - NG because not same attribute sequence -->
    <DK>
        <Para tag_DK="L1">
            <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
        </Para>
        <Para tag_DK="L1_sub">
            <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
        </Para>
        <Para tag_DK="Illanc">
            <Content_DK>Danish_translation_of_string_2</Content_DK>
        </Para>
        <Para tag_DK="L1">
            <Content_DK>Danish_translation_of_string_4</Content_DK>
        </Para>
        <Para tag_DK="|PLB">
            <Content_DK>Danish_translation_of_string_3</Content_DK>
        </Para>
        <Para tag_DK="L3">
            <Content_DK>Danish_translation_of_string_6</Content_DK>
        </Para>
        <Para tag_DK="Sub">
            <Content_DK>Danish_translation_of_string_5</Content_DK>
        </Para>
        <Para tag_DK="Subbull">
            <Content_DK>Danish_translation_of_string_7</Content_DK>
        </Para>
    </DK>
</Section>
</Book>

所以

GB tag_GB值序列= L1 - &gt; Illanc - &gt; ... - &gt; SubBul

DE tag_DE值序列= L1 - &gt; Illanc - &gt; ... - &gt; SubBul(和GB一样好)

DK tag_DK值序列= L1 - &gt; L1->哎呀,期望Illanc意味着这个序列与GB不同,并且可以忽略语言环境

由于德语和英语节点集具有相同的属性序列,我喜欢将它们组合如下:

<Book>
<Dictionary>
    <Para tag="L1">
        <Content_GB>string_1</Content_GB>
        <Content_DE>German_translation of_string_1</Content_DE>
    </Para>
    <Para tag="Illanc">
        <Content_GB>string_2</Content_GB>
        <Content_DE>German_translation of_string_2</Content_DE>
    </Para>
    <Para tag="|PLB">
        <Content_GB>string_3</Content_GB>
        <Content_DE>German_translation of_string_3</Content_DE>
    </Para>
    <Para tag="L1">
        <Content_GB>string_4</Content_GB>
        <Content_DE>German_translation of_string_4</Content_DE>
    </Para>
    <Para tag="Sub">
        <Content_GB>string_5</Content_GB>
        <Content_DE>German_translation of_string_5</Content_DE>
    </Para>
    <Para tag="L3">
        <Content_GB>string_6</Content_GB>
        <Content_DE>German_translation of_string_6</Content_DE>
    </Para>
    <Para tag="Subbull">
        <Content_GB>string_7</Content_GB>
        <Content_DE>German_translation of_string_7</Content_DE>
    </Para>
</Dictionary>
</Book>

我使用的样式表如下:

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" xmlns="http://www.w3.org/1999/xhtml" encoding="UTF-8" indent="yes"/>
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="/">
    <xsl:copy>
        <xsl:apply-templates select="@* | node()"/>
    </xsl:copy>
</xsl:template>
<xsl:template match="@* | node()">
    <xsl:copy>
        <xsl:apply-templates select="@* | node()"/>
    </xsl:copy>
</xsl:template>
<xsl:template match="text()">
    <xsl:value-of select="normalize-space(.)"/>
</xsl:template>
<xsl:template match="Section">
    <!-- store reference tag list -->
    <xsl:variable name="Ref_tagList" select="GB/Para/attribute()[1]"/>
    <Dictionary>
        <xsl:for-each select="GB/Para">
            <xsl:variable name="pos" select="position()"/>
            <Para tag="{@tag_GB}">
                <!-- Copy English Master -->
                <xsl:apply-templates select="element()[1]"/>
                <xsl:for-each select="//Book/Section/element()[not(self::GB)]">
                    <!-- store current locale tag list -->
                    <xsl:variable name="Curr_tagList" select="Para/attribute()[1]"/>
                    <xsl:if test="$Ref_tagList = $Curr_tagList">
                        <!-- Copy current locale is current tag list equals reference tag list -->
                        <xsl:apply-templates select="Para[position()=$pos]/element()[1]"/>
                    </xsl:if>
                </xsl:for-each>
            </Para>
        </xsl:for-each>
    </Dictionary>
</xsl:template>
</xsl:stylesheet>

除了可能不是最有效的方法(我对xslt游戏相当新...)它也不起作用。我想到的逻辑是采用英语母版的属性集,如果任何其他语言环境的属性集相等,我复制,如果不是我忽略。但由于某种原因,还可以快速复制具有不同属性序列的节点集(如下所示)。有人可以告诉我,我的逻辑与现实冲突吗?提前谢谢!

当前的输出包括应该被忽略的丹麦语......

<Book>
<Dictionary>
    <Para tag="L1">
        <Content_GB>string_1</Content_GB>
        <Content_DE>German_translation of_string_1</Content_DE>
        <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
    </Para>
    <Para tag="Illanc">
        <Content_GB>string_2</Content_GB>
        <Content_DE>German_translation of_string_2</Content_DE>
        <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
    </Para>
    <Para tag="|PLB">
        <Content_GB>string_3</Content_GB>
        <Content_DE>German_translation of_string_3</Content_DE>
        <Content_DK>Danish_translation_of_string_2</Content_DK>
    </Para>
    <Para tag="L1">
        <Content_GB>string_4</Content_GB>
        <Content_DE>German_translation of_string_4</Content_DE>
        <Content_DK>Danish_translation_of_string_4</Content_DK>
    </Para>
    <Para tag="Sub">
        <Content_GB>string_5</Content_GB>
        <Content_DE>German_translation of_string_5</Content_DE>
        <Content_DK>Danish_translation_of_string_3</Content_DK>
    </Para>
    <Para tag="L3">
        <Content_GB>string_6</Content_GB>
        <Content_DE>German_translation of_string_6</Content_DE>
        <Content_DK>Danish_translation_of_string_6</Content_DK>
    </Para>
    <Para tag="Subbull">
        <Content_GB>string_7</Content_GB>
        <Content_DE>German_translation of_string_7</Content_DE>
        <Content_DK>Danish_translation_of_string_5</Content_DK>
    </Para>
</Dictionary>
</Book>

2 个答案:

答案 0 :(得分:1)

这可能不是最好的解决方案。我使用了以下XSLT 2.0功能:

  • 我使用string-join()比较了属性序列。
  • 我利用了使用RTF变量的可能性

可能有更多XSLT 2.0工具可以解决您的问题。但我认为这里的大问题是你的输入文件。

我很抱歉没看到你当前的变换。刚从头开始实施。希望它有所帮助:

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="GB">
        <Book>
            <Dictionary>

                <xsl:variable name="matches">
                    <xsl:for-each select="following-sibling::*
                        [string-join(Para/@*,'-')
                        = string-join(current()/Para/@*,'-')]">
                        <match><xsl:copy-of select="Para/*"/></match>
                    </xsl:for-each>
                </xsl:variable>

                <xsl:apply-templates select="Para">
                    <xsl:with-param name="matches" select="$matches"/>
                </xsl:apply-templates>

            </Dictionary>
        </Book>
    </xsl:template>

    <xsl:template match="Para[parent::GB]">
        <xsl:param name="matches"/>
        <xsl:variable name="pos" select="position()"/>
        <Para tag="{@tag_GB}">
            <xsl:copy-of select="Content_GB"/>
            <xsl:copy-of select="$matches/match/*[position()=$pos]"/>
        </Para>
    </xsl:template>

    <xsl:template match="text()"/>

</xsl:stylesheet> 

当应用于问题中提供的输入文档时,将生成以下输出:

<Book>
   <Dictionary>
      <Para tag="L1">
         <Content_GB>string_1</Content_GB>
         <Content_DE>German_translation of_string_1</Content_DE>
      </Para>
      <Para tag="Illanc">
         <Content_GB>string_2</Content_GB>
         <Content_DE>German_translation of_string_2</Content_DE>
      </Para>
      <Para tag="|PLB">
         <Content_GB>string_3</Content_GB>
         <Content_DE>German_translation of_string_3</Content_DE>
      </Para>
      <Para tag="L1">
         <Content_GB>string_4</Content_GB>
         <Content_DE>German_translation of_string_4</Content_DE>
      </Para>
      <Para tag="Sub">
         <Content_GB>string_5</Content_GB>
         <Content_DE>German_translation of_string_5</Content_DE>
      </Para>
      <Para tag="L3">
         <Content_GB>string_6</Content_GB>
         <Content_DE>German_translation of_string_6</Content_DE>
      </Para>
      <Para tag="Subbull">
         <Content_GB>string_7</Content_GB>
         <Content_DE>German_translation of_string_7</Content_DE>
      </Para>
   </Dictionary>
</Book>

答案 1 :(得分:1)

此样式表使用<xsl:for-each-group>

  1. 首先,按照Para/@*
  2. 的顺序对元素进行分组
  3. 然后,对于每个序列,使用具有以“tag”开头的属性的Para元素的数量对following sibling进行分组。
  4. 我对@*的匹配项有谓词过滤器,以确保它比较以“tag_”开头的过滤器。这可能没有必要,但如果将其他属性添加到实例XML中,将有助于确保它仍然有效。

    <xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
        <xsl:output method="xml" version="1.0" xmlns="http://www.w3.org/1999/xhtml" encoding="UTF-8"
            indent="yes"/>
        <xsl:output omit-xml-declaration="yes" indent="yes"/>
    
        <xsl:template match="@* | node()">
            <xsl:copy>
                <xsl:apply-templates select="@* | node()"/>
            </xsl:copy>
        </xsl:template>
    
        <xsl:template match="text()" priority="1">
            <xsl:value-of select="normalize-space(.)"/>
        </xsl:template>
    
        <xsl:template match="Section">
            <xsl:for-each-group select="*"
                group-adjacent="string-join(
                Para/@*[starts-with(local-name(),'tag_')],'|')">
                <Dictionary>
                    <xsl:for-each-group select="current-group()/Para"
                        group-by="count(
                        following-sibling::*[@*[starts-with(local-name(),'tag_')]])">
                        <Para tag="{(current-group()/@*[starts-with(local-name(),'tag_')])[1]}">
                            <xsl:copy-of select="current-group()/*"/>
                        </Para>
                    </xsl:for-each-group>
                </Dictionary>
            </xsl:for-each-group>
        </xsl:template>
    
    </xsl:stylesheet>
    

    当应用于样本输入XML时,产生以下输出:

    <Book>
       <Dictionary>
          <Para tag="L1">
             <Content_GB>string_1</Content_GB>
             <Content_DE>German_translation of_string_1</Content_DE>
          </Para>
          <Para tag="Illanc">
             <Content_GB>string_2</Content_GB>
             <Content_DE>German_translation of_string_2</Content_DE>
          </Para>
          <Para tag="|PLB">
             <Content_GB>string_3</Content_GB>
             <Content_DE>German_translation of_string_3</Content_DE>
          </Para>
          <Para tag="L1">
             <Content_GB>string_4</Content_GB>
             <Content_DE>German_translation of_string_4</Content_DE>
          </Para>
          <Para tag="Sub">
             <Content_GB>string_5</Content_GB>
             <Content_DE>German_translation of_string_5</Content_DE>
          </Para>
          <Para tag="L3">
             <Content_GB>string_6</Content_GB>
             <Content_DE>German_translation of_string_6</Content_DE>
          </Para>
          <Para tag="Subbull">
             <Content_GB>string_7</Content_GB>
             <Content_DE>German_translation of_string_7</Content_DE>
          </Para>
       </Dictionary>
       <Dictionary>
          <Para tag="L1">
             <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
          </Para>
          <Para tag="L1_sub">
             <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
          </Para>
          <Para tag="Illanc">
             <Content_DK>Danish_translation_of_string_2</Content_DK>
          </Para>
          <Para tag="L1">
             <Content_DK>Danish_translation_of_string_4</Content_DK>
          </Para>
          <Para tag="|PLB">
             <Content_DK>Danish_translation_of_string_3</Content_DK>
          </Para>
          <Para tag="L3">
             <Content_DK>Danish_translation_of_string_6</Content_DK>
          </Para>
          <Para tag="Sub">
             <Content_DK>Danish_translation_of_string_5</Content_DK>
          </Para>
          <Para tag="Subbull">
             <Content_DK>Danish_translation_of_string_7</Content_DK>
          </Para>
       </Dictionary>
    </Book>