我正在尝试构建一种库XML,比较各种节点并将它们组合起来以便以后重用。逻辑应该相当简单,如果给定语言的tag_XX属性值序列等于另一种语言的tag_YY属性值序列,则可以组合节点。见下面的XML示例
<Book>
<Section>
<GB>
<Para tag_GB="L1">
<Content_GB>string_1</Content_GB>
</Para>
<Para tag_GB="Illanc">
<Content_GB>string_2</Content_GB>
</Para>
<Para tag_GB="|PLB">
<Content_GB>string_3</Content_GB>
</Para>
<Para tag_GB="L1">
<Content_GB>string_4</Content_GB>
</Para>
<Para tag_GB="Sub">
<Content_GB>string_5</Content_GB>
</Para>
<Para tag_GB="L3">
<Content_GB>string_6</Content_GB>
</Para>
<Para tag_GB="Subbull">
<Content_GB>string_7</Content_GB>
</Para>
</GB>
<!-- German translations - OK because same attribute sequence -->
<DE>
<Para tag_DE="L1">
<Content_DE>German_translation of_string_1</Content_DE>
</Para>
<Para tag_DE="Illanc">
<Content_DE>German_translation of_string_2</Content_DE>
</Para>
<Para tag_DE="|PLB">
<Content_DE>German_translation of_string_3</Content_DE>
</Para>
<Para tag_DE="L1">
<Content_DE>German_translation of_string_4</Content_DE>
</Para>
<Para tag_DE="Sub">
<Content_DE>German_translation of_string_5</Content_DE>
</Para>
<Para tag_DE="L3">
<Content_DE>German_translation of_string_6</Content_DE>
</Para>
<Para tag_DE="Subbull">
<Content_DE>German_translation of_string_7</Content_DE>
</Para>
</DE>
<!-- Danish translations - NG because not same attribute sequence -->
<DK>
<Para tag_DK="L1">
<Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
</Para>
<Para tag_DK="L1_sub">
<Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
</Para>
<Para tag_DK="Illanc">
<Content_DK>Danish_translation_of_string_2</Content_DK>
</Para>
<Para tag_DK="L1">
<Content_DK>Danish_translation_of_string_4</Content_DK>
</Para>
<Para tag_DK="|PLB">
<Content_DK>Danish_translation_of_string_3</Content_DK>
</Para>
<Para tag_DK="L3">
<Content_DK>Danish_translation_of_string_6</Content_DK>
</Para>
<Para tag_DK="Sub">
<Content_DK>Danish_translation_of_string_5</Content_DK>
</Para>
<Para tag_DK="Subbull">
<Content_DK>Danish_translation_of_string_7</Content_DK>
</Para>
</DK>
</Section>
</Book>
所以
GB tag_GB值序列= L1 - &gt; Illanc - &gt; ... - &gt; SubBul
DE tag_DE值序列= L1 - &gt; Illanc - &gt; ... - &gt; SubBul(和GB一样好)
DK tag_DK值序列= L1 - &gt; L1->哎呀,期望Illanc意味着这个序列与GB不同,并且可以忽略语言环境
由于德语和英语节点集具有相同的属性序列,我喜欢将它们组合如下:
<Book>
<Dictionary>
<Para tag="L1">
<Content_GB>string_1</Content_GB>
<Content_DE>German_translation of_string_1</Content_DE>
</Para>
<Para tag="Illanc">
<Content_GB>string_2</Content_GB>
<Content_DE>German_translation of_string_2</Content_DE>
</Para>
<Para tag="|PLB">
<Content_GB>string_3</Content_GB>
<Content_DE>German_translation of_string_3</Content_DE>
</Para>
<Para tag="L1">
<Content_GB>string_4</Content_GB>
<Content_DE>German_translation of_string_4</Content_DE>
</Para>
<Para tag="Sub">
<Content_GB>string_5</Content_GB>
<Content_DE>German_translation of_string_5</Content_DE>
</Para>
<Para tag="L3">
<Content_GB>string_6</Content_GB>
<Content_DE>German_translation of_string_6</Content_DE>
</Para>
<Para tag="Subbull">
<Content_GB>string_7</Content_GB>
<Content_DE>German_translation of_string_7</Content_DE>
</Para>
</Dictionary>
</Book>
我使用的样式表如下:
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" xmlns="http://www.w3.org/1999/xhtml" encoding="UTF-8" indent="yes"/>
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="/">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="text()">
<xsl:value-of select="normalize-space(.)"/>
</xsl:template>
<xsl:template match="Section">
<!-- store reference tag list -->
<xsl:variable name="Ref_tagList" select="GB/Para/attribute()[1]"/>
<Dictionary>
<xsl:for-each select="GB/Para">
<xsl:variable name="pos" select="position()"/>
<Para tag="{@tag_GB}">
<!-- Copy English Master -->
<xsl:apply-templates select="element()[1]"/>
<xsl:for-each select="//Book/Section/element()[not(self::GB)]">
<!-- store current locale tag list -->
<xsl:variable name="Curr_tagList" select="Para/attribute()[1]"/>
<xsl:if test="$Ref_tagList = $Curr_tagList">
<!-- Copy current locale is current tag list equals reference tag list -->
<xsl:apply-templates select="Para[position()=$pos]/element()[1]"/>
</xsl:if>
</xsl:for-each>
</Para>
</xsl:for-each>
</Dictionary>
</xsl:template>
</xsl:stylesheet>
除了可能不是最有效的方法(我对xslt游戏相当新...)它也不起作用。我想到的逻辑是采用英语母版的属性集,如果任何其他语言环境的属性集相等,我复制,如果不是我忽略。但由于某种原因,还可以快速复制具有不同属性序列的节点集(如下所示)。有人可以告诉我,我的逻辑与现实冲突吗?提前谢谢!
当前的输出包括应该被忽略的丹麦语......
<Book>
<Dictionary>
<Para tag="L1">
<Content_GB>string_1</Content_GB>
<Content_DE>German_translation of_string_1</Content_DE>
<Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
</Para>
<Para tag="Illanc">
<Content_GB>string_2</Content_GB>
<Content_DE>German_translation of_string_2</Content_DE>
<Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
</Para>
<Para tag="|PLB">
<Content_GB>string_3</Content_GB>
<Content_DE>German_translation of_string_3</Content_DE>
<Content_DK>Danish_translation_of_string_2</Content_DK>
</Para>
<Para tag="L1">
<Content_GB>string_4</Content_GB>
<Content_DE>German_translation of_string_4</Content_DE>
<Content_DK>Danish_translation_of_string_4</Content_DK>
</Para>
<Para tag="Sub">
<Content_GB>string_5</Content_GB>
<Content_DE>German_translation of_string_5</Content_DE>
<Content_DK>Danish_translation_of_string_3</Content_DK>
</Para>
<Para tag="L3">
<Content_GB>string_6</Content_GB>
<Content_DE>German_translation of_string_6</Content_DE>
<Content_DK>Danish_translation_of_string_6</Content_DK>
</Para>
<Para tag="Subbull">
<Content_GB>string_7</Content_GB>
<Content_DE>German_translation of_string_7</Content_DE>
<Content_DK>Danish_translation_of_string_5</Content_DK>
</Para>
</Dictionary>
</Book>
答案 0 :(得分:1)
这可能不是最好的解决方案。我使用了以下XSLT 2.0功能:
string-join()
比较了属性序列。 可能有更多XSLT 2.0工具可以解决您的问题。但我认为这里的大问题是你的输入文件。
我很抱歉没看到你当前的变换。刚从头开始实施。希望它有所帮助:
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="GB">
<Book>
<Dictionary>
<xsl:variable name="matches">
<xsl:for-each select="following-sibling::*
[string-join(Para/@*,'-')
= string-join(current()/Para/@*,'-')]">
<match><xsl:copy-of select="Para/*"/></match>
</xsl:for-each>
</xsl:variable>
<xsl:apply-templates select="Para">
<xsl:with-param name="matches" select="$matches"/>
</xsl:apply-templates>
</Dictionary>
</Book>
</xsl:template>
<xsl:template match="Para[parent::GB]">
<xsl:param name="matches"/>
<xsl:variable name="pos" select="position()"/>
<Para tag="{@tag_GB}">
<xsl:copy-of select="Content_GB"/>
<xsl:copy-of select="$matches/match/*[position()=$pos]"/>
</Para>
</xsl:template>
<xsl:template match="text()"/>
</xsl:stylesheet>
当应用于问题中提供的输入文档时,将生成以下输出:
<Book>
<Dictionary>
<Para tag="L1">
<Content_GB>string_1</Content_GB>
<Content_DE>German_translation of_string_1</Content_DE>
</Para>
<Para tag="Illanc">
<Content_GB>string_2</Content_GB>
<Content_DE>German_translation of_string_2</Content_DE>
</Para>
<Para tag="|PLB">
<Content_GB>string_3</Content_GB>
<Content_DE>German_translation of_string_3</Content_DE>
</Para>
<Para tag="L1">
<Content_GB>string_4</Content_GB>
<Content_DE>German_translation of_string_4</Content_DE>
</Para>
<Para tag="Sub">
<Content_GB>string_5</Content_GB>
<Content_DE>German_translation of_string_5</Content_DE>
</Para>
<Para tag="L3">
<Content_GB>string_6</Content_GB>
<Content_DE>German_translation of_string_6</Content_DE>
</Para>
<Para tag="Subbull">
<Content_GB>string_7</Content_GB>
<Content_DE>German_translation of_string_7</Content_DE>
</Para>
</Dictionary>
</Book>
答案 1 :(得分:1)
此样式表使用<xsl:for-each-group>
Para/@*
值Para
元素的数量对following sibling
进行分组。我对@*
的匹配项有谓词过滤器,以确保它比较以“tag_”开头的过滤器。这可能没有必要,但如果将其他属性添加到实例XML中,将有助于确保它仍然有效。
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" xmlns="http://www.w3.org/1999/xhtml" encoding="UTF-8"
indent="yes"/>
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="text()" priority="1">
<xsl:value-of select="normalize-space(.)"/>
</xsl:template>
<xsl:template match="Section">
<xsl:for-each-group select="*"
group-adjacent="string-join(
Para/@*[starts-with(local-name(),'tag_')],'|')">
<Dictionary>
<xsl:for-each-group select="current-group()/Para"
group-by="count(
following-sibling::*[@*[starts-with(local-name(),'tag_')]])">
<Para tag="{(current-group()/@*[starts-with(local-name(),'tag_')])[1]}">
<xsl:copy-of select="current-group()/*"/>
</Para>
</xsl:for-each-group>
</Dictionary>
</xsl:for-each-group>
</xsl:template>
</xsl:stylesheet>
当应用于样本输入XML时,产生以下输出:
<Book>
<Dictionary>
<Para tag="L1">
<Content_GB>string_1</Content_GB>
<Content_DE>German_translation of_string_1</Content_DE>
</Para>
<Para tag="Illanc">
<Content_GB>string_2</Content_GB>
<Content_DE>German_translation of_string_2</Content_DE>
</Para>
<Para tag="|PLB">
<Content_GB>string_3</Content_GB>
<Content_DE>German_translation of_string_3</Content_DE>
</Para>
<Para tag="L1">
<Content_GB>string_4</Content_GB>
<Content_DE>German_translation of_string_4</Content_DE>
</Para>
<Para tag="Sub">
<Content_GB>string_5</Content_GB>
<Content_DE>German_translation of_string_5</Content_DE>
</Para>
<Para tag="L3">
<Content_GB>string_6</Content_GB>
<Content_DE>German_translation of_string_6</Content_DE>
</Para>
<Para tag="Subbull">
<Content_GB>string_7</Content_GB>
<Content_DE>German_translation of_string_7</Content_DE>
</Para>
</Dictionary>
<Dictionary>
<Para tag="L1">
<Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
</Para>
<Para tag="L1_sub">
<Content_DK>Partial_Danish_translation_of_string_1</Content_DK>
</Para>
<Para tag="Illanc">
<Content_DK>Danish_translation_of_string_2</Content_DK>
</Para>
<Para tag="L1">
<Content_DK>Danish_translation_of_string_4</Content_DK>
</Para>
<Para tag="|PLB">
<Content_DK>Danish_translation_of_string_3</Content_DK>
</Para>
<Para tag="L3">
<Content_DK>Danish_translation_of_string_6</Content_DK>
</Para>
<Para tag="Sub">
<Content_DK>Danish_translation_of_string_5</Content_DK>
</Para>
<Para tag="Subbull">
<Content_DK>Danish_translation_of_string_7</Content_DK>
</Para>
</Dictionary>
</Book>