Question

我正在尝试构建一种库XML，比较各种节点并将它们组合起来以便以后重用。逻辑应该相当简单，如果给定语言的tag_XX属性值序列等于另一种语言的tag_YY属性值序列，则可以组合节点。见下面的XML示例

<Book>
<Section>
    <GB>
        <Para tag_GB="L1">
            <Content_GB>string_1</Content_GB>
        </Para>
        <Para tag_GB="Illanc">
            <Content_GB>string_2</Content_GB>
        </Para>
        <Para tag_GB="|PLB">
            <Content_GB>string_3</Content_GB>
        </Para>
        <Para tag_GB="L1">
            <Content_GB>string_4</Content_GB>
        </Para>
        <Para tag_GB="Sub">
            <Content_GB>string_5</Content_GB>
        </Para>
        <Para tag_GB="L3">
            <Content_GB>string_6</Content_GB>
        </Para>
        <Para tag_GB="Subbull">
            <Content_GB>string_7</Content_GB>
        </Para>
    </GB>
    <!-- German translations - OK because same attribute sequence -->
    <DE>
        <Para tag_DE="L1">
            <Content_DE>German_translation of_string_1</Content_DE>
        </Para>
        <Para tag_DE="Illanc">
            <Content_DE>German_translation of_string_2</Content_DE>
        </Para>
        <Para tag_DE="|PLB">
            <Content_DE>German_translation of_string_3</Content_DE>
        </Para>
        <Para tag_DE="L1">
            <Content_DE>German_translation of_string_4</Content_DE>
        </Para>
        <Para tag_DE="Sub">
            <Content_DE>German_translation of_string_5</Content_DE>
        </Para>
        <Para tag_DE="L3">
            <Content_DE>German_translation of_string_6</Content_DE>
        </Para>
        <Para tag_DE="Subbull">
            <Content_DE>German_translation of_string_7</Content_DE>
        </Para>
    </DE>
    <!-- Danish translations - NG because not same attribute sequence -->
    <DK>
        <Para tag_DK="L1">
            <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
        </Para>
        <Para tag_DK="L1_sub">
            <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
        </Para>
        <Para tag_DK="Illanc">
            <Content_DK>Danish_translation_of_string_2</Content_DK>
        </Para>
        <Para tag_DK="L1">
            <Content_DK>Danish_translation_of_string_4</Content_DK>
        </Para>
        <Para tag_DK="|PLB">
            <Content_DK>Danish_translation_of_string_3</Content_DK>
        </Para>
        <Para tag_DK="L3">
            <Content_DK>Danish_translation_of_string_6</Content_DK>
        </Para>
        <Para tag_DK="Sub">
            <Content_DK>Danish_translation_of_string_5</Content_DK>
        </Para>
        <Para tag_DK="Subbull">
            <Content_DK>Danish_translation_of_string_7</Content_DK>
        </Para>
    </DK>
</Section>
</Book>

所以

GB tag_GB值序列= L1 - ＆gt; Illanc - ＆gt; ... - ＆gt; SubBul

DE tag_DE值序列= L1 - ＆gt; Illanc - ＆gt; ... - ＆gt; SubBul（和GB一样好）

DK tag_DK值序列= L1 - ＆gt; L1->哎呀，期望Illanc意味着这个序列与GB不同，并且可以忽略语言环境

由于德语和英语节点集具有相同的属性序列，我喜欢将它们组合如下：

<Book>
<Dictionary>
    <Para tag="L1">
        <Content_GB>string_1</Content_GB>
        <Content_DE>German_translation of_string_1</Content_DE>
    </Para>
    <Para tag="Illanc">
        <Content_GB>string_2</Content_GB>
        <Content_DE>German_translation of_string_2</Content_DE>
    </Para>
    <Para tag="|PLB">
        <Content_GB>string_3</Content_GB>
        <Content_DE>German_translation of_string_3</Content_DE>
    </Para>
    <Para tag="L1">
        <Content_GB>string_4</Content_GB>
        <Content_DE>German_translation of_string_4</Content_DE>
    </Para>
    <Para tag="Sub">
        <Content_GB>string_5</Content_GB>
        <Content_DE>German_translation of_string_5</Content_DE>
    </Para>
    <Para tag="L3">
        <Content_GB>string_6</Content_GB>
        <Content_DE>German_translation of_string_6</Content_DE>
    </Para>
    <Para tag="Subbull">
        <Content_GB>string_7</Content_GB>
        <Content_DE>German_translation of_string_7</Content_DE>
    </Para>
</Dictionary>
</Book>

我使用的样式表如下：

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" xmlns="http://www.w3.org/1999/xhtml" encoding="UTF-8" indent="yes"/>
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="/">
    <xsl:copy>
        <xsl:apply-templates select="@* | node()"/>
    </xsl:copy>
</xsl:template>
<xsl:template match="@* | node()">
    <xsl:copy>
        <xsl:apply-templates select="@* | node()"/>
    </xsl:copy>
</xsl:template>
<xsl:template match="text()">
    <xsl:value-of select="normalize-space(.)"/>
</xsl:template>
<xsl:template match="Section">
    <!-- store reference tag list -->
    <xsl:variable name="Ref_tagList" select="GB/Para/attribute()[1]"/>
    <Dictionary>
        <xsl:for-each select="GB/Para">
            <xsl:variable name="pos" select="position()"/>
            <Para tag="{@tag_GB}">
                <!-- Copy English Master -->
                <xsl:apply-templates select="element()[1]"/>
                <xsl:for-each select="//Book/Section/element()[not(self::GB)]">
                    <!-- store current locale tag list -->
                    <xsl:variable name="Curr_tagList" select="Para/attribute()[1]"/>
                    <xsl:if test="$Ref_tagList = $Curr_tagList">
                        <!-- Copy current locale is current tag list equals reference tag list -->
                        <xsl:apply-templates select="Para[position()=$pos]/element()[1]"/>
                    </xsl:if>
                </xsl:for-each>
            </Para>
        </xsl:for-each>
    </Dictionary>
</xsl:template>
</xsl:stylesheet>

除了可能不是最有效的方法（我对xslt游戏相当新...）它也不起作用。我想到的逻辑是采用英语母版的属性集，如果任何其他语言环境的属性集相等，我复制，如果不是我忽略。但由于某种原因，还可以快速复制具有不同属性序列的节点集（如下所示）。有人可以告诉我，我的逻辑与现实冲突吗？提前谢谢！

当前的输出包括应该被忽略的丹麦语......

<Book>
<Dictionary>
    <Para tag="L1">
        <Content_GB>string_1</Content_GB>
        <Content_DE>German_translation of_string_1</Content_DE>
        <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
    </Para>
    <Para tag="Illanc">
        <Content_GB>string_2</Content_GB>
        <Content_DE>German_translation of_string_2</Content_DE>
        <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
    </Para>
    <Para tag="|PLB">
        <Content_GB>string_3</Content_GB>
        <Content_DE>German_translation of_string_3</Content_DE>
        <Content_DK>Danish_translation_of_string_2</Content_DK>
    </Para>
    <Para tag="L1">
        <Content_GB>string_4</Content_GB>
        <Content_DE>German_translation of_string_4</Content_DE>
        <Content_DK>Danish_translation_of_string_4</Content_DK>
    </Para>
    <Para tag="Sub">
        <Content_GB>string_5</Content_GB>
        <Content_DE>German_translation of_string_5</Content_DE>
        <Content_DK>Danish_translation_of_string_3</Content_DK>
    </Para>
    <Para tag="L3">
        <Content_GB>string_6</Content_GB>
        <Content_DE>German_translation of_string_6</Content_DE>
        <Content_DK>Danish_translation_of_string_6</Content_DK>
    </Para>
    <Para tag="Subbull">
        <Content_GB>string_7</Content_GB>
        <Content_DE>German_translation of_string_7</Content_DE>
        <Content_DK>Danish_translation_of_string_5</Content_DK>
    </Para>
</Dictionary>
</Book>

Answer 1

这可能不是最好的解决方案。我使用了以下XSLT 2.0功能：

我使用string-join()比较了属性序列。
我利用了使用RTF变量的可能性

可能有更多XSLT 2.0工具可以解决您的问题。但我认为这里的大问题是你的输入文件。

我很抱歉没看到你当前的变换。刚从头开始实施。希望它有所帮助：

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="GB">
        <Book>
            <Dictionary>

                <xsl:variable name="matches">
                    <xsl:for-each select="following-sibling::*
                        [string-join(Para/@*,'-')
                        = string-join(current()/Para/@*,'-')]">
                        <match><xsl:copy-of select="Para/*"/></match>
                    </xsl:for-each>
                </xsl:variable>

                <xsl:apply-templates select="Para">
                    <xsl:with-param name="matches" select="$matches"/>
                </xsl:apply-templates>

            </Dictionary>
        </Book>
    </xsl:template>

    <xsl:template match="Para[parent::GB]">
        <xsl:param name="matches"/>
        <xsl:variable name="pos" select="position()"/>
        <Para tag="{@tag_GB}">
            <xsl:copy-of select="Content_GB"/>
            <xsl:copy-of select="$matches/match/*[position()=$pos]"/>
        </Para>
    </xsl:template>

    <xsl:template match="text()"/>

</xsl:stylesheet>

当应用于问题中提供的输入文档时，将生成以下输出：

<Book>
   <Dictionary>
      <Para tag="L1">
         <Content_GB>string_1</Content_GB>
         <Content_DE>German_translation of_string_1</Content_DE>
      </Para>
      <Para tag="Illanc">
         <Content_GB>string_2</Content_GB>
         <Content_DE>German_translation of_string_2</Content_DE>
      </Para>
      <Para tag="|PLB">
         <Content_GB>string_3</Content_GB>
         <Content_DE>German_translation of_string_3</Content_DE>
      </Para>
      <Para tag="L1">
         <Content_GB>string_4</Content_GB>
         <Content_DE>German_translation of_string_4</Content_DE>
      </Para>
      <Para tag="Sub">
         <Content_GB>string_5</Content_GB>
         <Content_DE>German_translation of_string_5</Content_DE>
      </Para>
      <Para tag="L3">
         <Content_GB>string_6</Content_GB>
         <Content_DE>German_translation of_string_6</Content_DE>
      </Para>
      <Para tag="Subbull">
         <Content_GB>string_7</Content_GB>
         <Content_DE>German_translation of_string_7</Content_DE>
      </Para>
   </Dictionary>
</Book>

Answer 2

此样式表使用<xsl:for-each-group>

首先，按照Para/@*值
然后，对于每个序列，使用具有以“tag”开头的属性的Para元素的数量对following sibling进行分组。

我对@*的匹配项有谓词过滤器，以确保它比较以“tag_”开头的过滤器。这可能没有必要，但如果将其他属性添加到实例XML中，将有助于确保它仍然有效。

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" version="1.0" xmlns="http://www.w3.org/1999/xhtml" encoding="UTF-8"
        indent="yes"/>
    <xsl:output omit-xml-declaration="yes" indent="yes"/>

    <xsl:template match="@* | node()">
        <xsl:copy>
            <xsl:apply-templates select="@* | node()"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="text()" priority="1">
        <xsl:value-of select="normalize-space(.)"/>
    </xsl:template>

    <xsl:template match="Section">
        <xsl:for-each-group select="*"
            group-adjacent="string-join(
            Para/@*[starts-with(local-name(),'tag_')],'|')">
            <Dictionary>
                <xsl:for-each-group select="current-group()/Para"
                    group-by="count(
                    following-sibling::*[@*[starts-with(local-name(),'tag_')]])">
                    <Para tag="{(current-group()/@*[starts-with(local-name(),'tag_')])[1]}">
                        <xsl:copy-of select="current-group()/*"/>
                    </Para>
                </xsl:for-each-group>
            </Dictionary>
        </xsl:for-each-group>
    </xsl:template>

</xsl:stylesheet>

当应用于样本输入XML时，产生以下输出：

<Book>
   <Dictionary>
      <Para tag="L1">
         <Content_GB>string_1</Content_GB>
         <Content_DE>German_translation of_string_1</Content_DE>
      </Para>
      <Para tag="Illanc">
         <Content_GB>string_2</Content_GB>
         <Content_DE>German_translation of_string_2</Content_DE>
      </Para>
      <Para tag="|PLB">
         <Content_GB>string_3</Content_GB>
         <Content_DE>German_translation of_string_3</Content_DE>
      </Para>
      <Para tag="L1">
         <Content_GB>string_4</Content_GB>
         <Content_DE>German_translation of_string_4</Content_DE>
      </Para>
      <Para tag="Sub">
         <Content_GB>string_5</Content_GB>
         <Content_DE>German_translation of_string_5</Content_DE>
      </Para>
      <Para tag="L3">
         <Content_GB>string_6</Content_GB>
         <Content_DE>German_translation of_string_6</Content_DE>
      </Para>
      <Para tag="Subbull">
         <Content_GB>string_7</Content_GB>
         <Content_DE>German_translation of_string_7</Content_DE>
      </Para>
   </Dictionary>
   <Dictionary>
      <Para tag="L1">
         <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
      </Para>
      <Para tag="L1_sub">
         <Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
      </Para>
      <Para tag="Illanc">
         <Content_DK>Danish_translation_of_string_2</Content_DK>
      </Para>
      <Para tag="L1">
         <Content_DK>Danish_translation_of_string_4</Content_DK>
      </Para>
      <Para tag="|PLB">
         <Content_DK>Danish_translation_of_string_3</Content_DK>
      </Para>
      <Para tag="L3">
         <Content_DK>Danish_translation_of_string_6</Content_DK>
      </Para>
      <Para tag="Sub">
         <Content_DK>Danish_translation_of_string_5</Content_DK>
      </Para>
      <Para tag="Subbull">
         <Content_DK>Danish_translation_of_string_7</Content_DK>
      </Para>
   </Dictionary>
</Book>

基于属性序列比较2个节点集

2 个答案: