如何基于follow / preceding-sibling text()节点谓词构建结构不良的内容?

时间:2013-05-10 20:54:32

标签: xml xslt xpath

根据下面的源XML,我想捕获不包含<emphasis>注意#:<emphasis bold="yes">的文本节点和</emphasis>元素及其对应的<emphasis bold="yes">注#:</emphasis>元素。

源XML:

<section>
  <para>
    <emphasis bold="yes">NOTE1:</emphasis> This is the text of the first note 1 <emphasis bold="yes">that should only be in the <emphasis italic="yes">first</emphasis> subsection occurance of note one.</emphasis>. This is the second sentence of the first note one. <emphasis italic="yes">Here is some other text</emphasis> that can appear. <emphasis bold="yes">Marvin Gaye is an excellent musician1.</emphasis> Play it for your girlfriend1 <emphasis italic="yes">now1.</emphasis>.
    <emphasis bold="yes">NOTE2:</emphasis> This is the text of  the first note two2.1 <emphasis italic="yes">The Isley Brothers are also good.2.1</emphasis>
    <emphasis bold="yes">NOTE1:</emphasis> This is the text of the second note one.1.2 <emphasis italic="yes">My girlfriend loves them1.2</emphasis>
    <emphasis bold="yes">NOTE3:</emphasis> This is the text of the first note three3.1.
    <emphasis bold="yes">NOTE1:</emphasis> This is the text of the third note one.1.3<emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis>
    <emphasis bold="yes">NOTE3:</emphasis> This is the text of the second note three.3.2<emphasis italic="yes">Steak and potatos3.2</emphasis>
    <emphasis bold="yes">NOTE2:</emphasis> This is the text of the second note two.2.2<emphasis italic="yes">And then some wine2.2</emphasis>
  </para>
</section>

当前的XSLT:

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

  <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

  <xsl:template match="*|@*|text()">
    <xsl:copy>
      <xsl:apply-templates select="*|@*|text()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="section">
    <root>
      <xsl:apply-templates select="*|@*|text()"/>
    </root>
  </xsl:template>

  <xsl:template match="para/emphasis[preceding-sibling::emphasis[@bold='yes' and text()='NOTE1:']]"/>
  <xsl:template match="para/emphasis[preceding-sibling::emphasis[@bold='yes' and text()='NOTE2:']]"/>
  <xsl:template match="para/emphasis[preceding-sibling::emphasis[@bold='yes' and text()='NOTE3:']]"/>
  <xsl:template match="para/text()[preceding-sibling::emphasis[@bold='yes' and text()='NOTE1:']]"/>
  <xsl:template match="para/text()[preceding-sibling::emphasis[@bold='yes' and text()='NOTE2:']]"/>
  <xsl:template match="para/text()[preceding-sibling::emphasis[@bold='yes' and text()='NOTE3:']]"/>

  <xsl:template match="para/emphasis[@bold='yes' and text()='NOTE1:' and not(position()=last())]">
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis>
        <xsl:copy-of
          select="following-sibling::text()[not(preceding-sibling::emphasis[@bold='yes' and text()='NOTE2:'])  and not(preceding-sibling::emphasis[@bold='yes' and text()='NOTE3:'])] |following-sibling::emphasis[not(contains(string(), 'NOTE1:'))  and  not(contains(string(), 'NOTE2:'))  and not(contains(string(), 'NOTE3:'))]"
        />
      </para>
    </subsection>
  </xsl:template>

  <xsl:template match="para/emphasis[position()=last() and position() > 1  and @bold='yes' and text()='NOTE1:']">
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis>
        <xsl:copy-of
          select="following-sibling::text()[position() = 1]|following-sibling::emphasis[not(contains(string(), 'NOTE1:'))  and  not(contains(string(), 'NOTE2:'))  and not(contains(string(), 'NOTE3:'))]"
        />
      </para>
    </subsection>
  </xsl:template>

  <xsl:template match="para/emphasis[@bold='yes' and text()='NOTE2:' and not(position()=last())]">
    <subsection>
      <para>
        <emphasis bold="yes">NOTE2:</emphasis>
        <xsl:copy-of
          select="following-sibling::text()[not(preceding-sibling::emphasis[@bold='yes' and text()='NOTE1:'])  and not(preceding-sibling::emphasis[@bold='yes' and text()='NOTE3:'])] |following-sibling::emphasis[not(contains(string(), 'NOTE1:'))  and  not(contains(string(), 'NOTE2:'))  and not(contains(string(), 'NOTE3:'))]"
        />
      </para>
    </subsection>
  </xsl:template>

  <xsl:template match="para/emphasis[position()=last() and position() > 1 and @bold='yes' and text()='NOTE2:']">
    <note>
      <para>
        <emphasis bold="yes">NOTE2:</emphasis>
        <xsl:copy-of
          select="following-sibling::text()[position() = 1]|following-sibling::emphasis[not(contains(string(), 'NOTE1:'))  and  not(contains(string(), 'NOTE2:')) and not(contains(string(), 'NOTE3:'))  ]"
        />
      </para>
    </note>
  </xsl:template>

  <xsl:template match="para/emphasis[@bold='yes' and text()='NOTE3:' and not(position()=last())]">
    <note>
      <para>
        <emphasis bold="yes">NOTE3:</emphasis>
        <xsl:copy-of
          select="following-sibling::text()[not(preceding-sibling::emphasis[@bold='yes' and text()='NOTE2:'])  and not(preceding-sibling::emphasis[@bold='yes' and text()='NOTE1:'])] | following-sibling::emphasis[not(contains(string(), 'NOTE1:'))  and  not(contains(string(), 'NOTE2:'))  and not(contains(string(), 'NOTE3:'))]"
        />
      </para>
    </note>
  </xsl:template>

  <xsl:template match="para/emphasis[position()=last() and position() > 1 and @bold='yes' and text()='NOTE3:']">
    <note>
      <para>
        <emphasis bold="yes">NOTE3:</emphasis>
        <xsl:copy-of select="following-sibling::text()[position() = 1]|following-sibling::emphasis[not(contains(string(), 'NOTE1:'))  and  not(contains(string(), 'NOTE2:'))  and not(contains(string(), 'NOTE3:'))  ]"/>
      </para>
    </note>
  </xsl:template>

</xsl:stylesheet>

当前输出:

<root>
  <para>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis> This is the text of the first note 1 <emphasis bold="yes">that should only be in the <emphasis italic="yes">first</emphasis> subsection occurance of note one.</emphasis>. This is the second sentence of the first note one. <emphasis italic="yes">Here is some other text</emphasis> that can appear. <emphasis bold="yes">Marvin Gaye is an excellent musician1.</emphasis> Play it for your girlfriend1 <emphasis italic="yes">now1.</emphasis>.
        <emphasis italic="yes">The Isley Brothers are also good.2.1</emphasis>
        <emphasis italic="yes">My girlfriend loves them1.2</emphasis>
        <emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis>
        <emphasis italic="yes">Steak and potatos3.2</emphasis>
        <emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE2:</emphasis>
        <emphasis italic="yes">The Isley Brothers are also good.2.1</emphasis>
        <emphasis italic="yes">My girlfriend loves them1.2</emphasis>
        <emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis>
        <emphasis italic="yes">Steak and potatos3.2</emphasis>
        <emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis>
        <emphasis italic="yes">My girlfriend loves them1.2</emphasis>
        <emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis>
        <emphasis italic="yes">Steak and potatos3.2</emphasis>
        <emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </subsection>
    <note>
      <para>
        <emphasis bold="yes">NOTE3:</emphasis>
        <emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis>
        <emphasis italic="yes">Steak and potatos3.2</emphasis>
        <emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </note>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis>
        <emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis>
        <emphasis italic="yes">Steak and potatos3.2</emphasis>
        <emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </subsection>
    <note>
      <para>
        <emphasis bold="yes">NOTE3:</emphasis>
        <emphasis italic="yes">Steak and potatos3.2</emphasis>
        <emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </note>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE2:</emphasis>
        <emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </subsection>
  </para>
</root>


Desired output:

<root>
  <para>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis> This is the text of the first note 1 <emphasis bold="yes">that should only be in the <emphasis italic="yes">first</emphasis> subsection occurance of note one.</emphasis>. This is the second sentence of the first note one. <emphasis italic="yes">Here is some other text</emphasis> that can appear. <emphasis bold="yes">Marvin Gaye is an excellent musician1.</emphasis> Play it for your girlfriend1 <emphasis italic="yes">now1.</emphasis>.
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE2:</emphasis>This is the text of  the first note two2.1<emphasis italic="yes">The Isley Brothers are also good.2.1</emphasis>
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis> This is the text of the second note one.1.2 <emphasis italic="yes">My girlfriend loves them1.2</emphasis>
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE3:</emphasis> This is the text of the first note three3.1.
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE1:</emphasis> This is the text of the third note one.1.3<emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis>
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE3:</emphasis> This is the text of the second note three.3.2<emphasis italic="yes">Steak and potatos3.2</emphasis>
      </para>
    </subsection>
    <subsection>
      <para>
        <emphasis bold="yes">NOTE2:</emphasis> This is the text of the second note two.2.2<emphasis italic="yes">And then some wine2.2</emphasis>
      </para>
    </subsection>
  </para>
</root>

1 个答案:

答案 0 :(得分:2)

如果我正确理解您的要求,我认为您的XSLT可能略微过于复杂。您似乎只想对节点进行分组,每个组的开头由强调节点表示,文本状态为“NOTE”

假设您正在使用XSLT 2.0(因为您当前的XSLT标记为版本=“2.0”),那么请向有用的 xsl:for-each-group 命令打个招呼,这应该就是您的意思需要。在您的情况下,将节点分组以相关的强调元素开头:

<xsl:for-each-group 
    select="node()" 
    group-starting-with="emphasis[starts-with(text(), 'NOTE')]">

在此内容中,您将输出子部分 para 元素,然后迭代组中的元素以复制它们

<xsl:apply-templates select="current-group()" />

您可以使用XSLT identity transform自行输出强调元素。

这是一个快乐得多的XSLT

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
   <xsl:output method="xml" indent="yes"/>
   <xsl:strip-space elements="*"/>

   <xsl:template match="para">
      <para>
         <xsl:for-each-group select="node()" group-starting-with="emphasis[starts-with(text(), 'NOTE')]">
            <subsection>
               <para>
                  <xsl:apply-templates select="current-group()" />
               </para>
            </subsection>
         </xsl:for-each-group>
      </para>
   </xsl:template>

   <xsl:template match="@*|node()">
      <xsl:copy>
         <xsl:apply-templates select="@*|node()"/>
      </xsl:copy>
   </xsl:template>
</xsl:stylesheet>

应用于XML时,输出以下内容

<section>
   <para>
      <subsection>
         <para>
            <emphasis bold="yes">NOTE1:</emphasis> This is the text of the first note 1 
            <emphasis bold="yes">that should only be in the 
               <emphasis italic="yes">first</emphasis>subsection occurance of note one. 
            </emphasis>. This is the second sentence of the first note one. 
            <emphasis italic="yes">Here is some other text</emphasis>that can appear. 
            <emphasis bold="yes">Marvin Gaye is an excellent musician1.</emphasis>Play it for your girlfriend1 
            <emphasis italic="yes">now1.</emphasis>. 
         </para>
      </subsection>
      <subsection>
         <para>
            <emphasis bold="yes">NOTE2:</emphasis>This is the text of the first note two2.1 
            <emphasis italic="yes">The Isley Brothers are also good.2.1</emphasis></para>
      </subsection>
      <subsection>
         <para>
            <emphasis bold="yes">NOTE1:</emphasis>This is the text of the second note one.1.2 
            <emphasis italic="yes">My girlfriend loves them1.2</emphasis></para>
      </subsection>
      <subsection>
         <para>
            <emphasis bold="yes">NOTE3:</emphasis>This is the text of the first note three3.1. </para>
      </subsection>
      <subsection>
         <para>
            <emphasis bold="yes">NOTE1:</emphasis>This is the text of the third note one.1.3 
            <emphasis italic="yes">She is going to make me dinner tonight1.3</emphasis></para>
      </subsection>
      <subsection>
         <para>
            <emphasis bold="yes">NOTE3:</emphasis> This is the text of the second note three.3.2 
            <emphasis italic="yes">Steak and potatos3.2</emphasis></para>
      </subsection>
      <subsection>
         <para>
            <emphasis bold="yes">NOTE2:</emphasis> This is the text of the second note two.2.2 
            <emphasis italic="yes">And then some wine2.2</emphasis></para>
      </subsection>
   </para>
</section>

有关如何在XSLT 2.0中进行分组的更多示例,请参阅http://www.xml.com/lpt/a/1314