使用XSLT </p>包装相邻的<p>标签

时间:2014-04-28 13:23:20

标签: xml xslt xpath

我正在尝试使用XSLT将HTML转换为XML。

HTML:

<html>
<body>
    <p class="one">Some paragraph 1.</p>
    <p class="one">Some paragraph 2.</p>
    <p class="one">Some paragraph 3.</p>
    <p class="one">Some paragraph 4.</p>
    <p class="one">Some paragraph 5.</p>
    <h3>Some heading</h3>
    <p class="ipsum">Wrapped paragraph 1.</p>
    <p class="ipsum">Wrapped paragraph 2.</p>
    <p class="ipsum">Wrapped paragraph 3.</p>
    <p class="ipsum">Wrapped paragraph 4.</p>
    <p class="ipsum">Wrapped paragraph 5.</p>
    <h3>Some heading</h3>
    <p class="two">Some paragraph 1.</p>
    <p class="two">Some paragraph 2.</p>
    <p class="two">Some paragraph 3.</p>
    <p class="two">Some paragraph 4.</p>
    <p class="two">Some paragraph 5.</p>
</body>
</html>

期望的输出:

<DocumentWrapper>
    <paragraph>Some paragraph 1.</paragraph>
    <paragraph>Some paragraph 2.</paragraph>
    <paragraph>Some paragraph 3.</paragraph>
    <paragraph>Some paragraph 4.</paragraph>
    <paragraph>Some paragraph 5.</paragraph>
    <Heading>Some heading</Heading>
    <WrapperSpecial>
        <SpecialParagraph>Wrapped paragraph 1.</SpecialParagraph>
        <SpecialParagraph>Wrapped paragraph 2.</SpecialParagraph>
        <SpecialParagraph>Wrapped paragraph 3.</SpecialParagraph>
        <SpecialParagraph>Wrapped paragraph 4.</SpecialParagraph>
        <SpecialParagraph>Wrapped paragraph 5.</SpecialParagraph>
    </WrapperSpecial>
    <Heading>Some heading</Heading>
    <paragraph>Some paragraph 1.</paragraph>
    <paragraph>Some paragraph 2.</paragraph>
    <paragraph>Some paragraph 3.</paragraph>
    <paragraph>Some paragraph 4.</paragraph>
    <paragraph>Some paragraph 5.</paragraph>
</DocumentWrapper>

我写过以下XSLT:

<?xml version="1.0" encoding="UTF-8"?>

<xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:template match="/">
        <DocumentWrapper>
            <xsl:apply-templates select="html/body/*" />
            <xsl:text>&#xa;</xsl:text></DocumentWrapper>
        </xsl:template>

        <xsl:template match="p[@class='one']|p[@class='two']">
            <xsl:text>&#xa;</xsl:text><paragraph>
            <xsl:value-of select="."/>
        </paragraph>
    </xsl:template>

    <xsl:template match="h3">
     <xsl:text>&#xa;</xsl:text>
     <Heading>
        <xsl:value-of select="."/>
    </Heading>
</xsl:template>

</xsl:stylesheet>

除了将带有p类的ipsum标记包装到:

外,它会执行所有操作
<WrapperSpecial>
      <SpecialParagraph>Wrapped paragraph 1.</SpecialParagraph>
      <SpecialParagraph>Wrapped paragraph 2.</SpecialParagraph>
      <SpecialParagraph>Wrapped paragraph 3.</SpecialParagraph>
      <SpecialParagraph>Wrapped paragraph 4.</SpecialParagraph>
      <SpecialParagraph>Wrapped paragraph 5.</SpecialParagraph>
</WrapperSpecial>

我不知道怎么做。

2 个答案:

答案 0 :(得分:2)

通常的方法是我要处理这类问题,你想要对相邻兄弟的做一些特别的事情,那就是编写一些东西,这样我们最初只将模板应用到每次运行中的第一个元素,并让该元素的模板处理该组的其余部分。

<xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

    <xsl:output indent="yes" />

    <xsl:template match="/">
        <xsl:apply-templates select="html/body" />
    </xsl:template>

    <xsl:template match="body">
        <DocumentWrapper>
            <!-- apply templates to (a) everything that is _not_ an ipsum para
                 and (b) the first ipsum para in each run -->
            <xsl:apply-templates select="*[not(self::p[@class='ipsum'])]
               | p[@class='ipsum']
                 [not(preceding-sibling::*[1][self::p[@class='ipsum']])]" />
        </DocumentWrapper>
    </xsl:template>

    <xsl:template match="p[@class='ipsum']">
        <WrapperSpecial>
            <xsl:apply-templates select="." mode="group" />
        </WrapperSpecial>
    </xsl:template>

    <xsl:template match="p" mode="group">
        <SpecialParagraph>
            <xsl:value-of select="." />
        </SpecialParagraph>
        <!-- keep processing siblings until we hit something that _isn't_ an
             ipsum para -->
        <xsl:apply-templates mode="group"
             select="following-sibling::*[1][self::p[@class='ipsum']]" />
    </xsl:template>

    <!-- this template will match any p elements except the ipsum ones -->
    <xsl:template match="p">
        <paragraph>
            <xsl:value-of select="."/>
        </paragraph>
    </xsl:template>

    <xsl:template match="h3">
        <Heading>
            <xsl:value-of select="."/>
        </Heading>
    </xsl:template>
</xsl:stylesheet>

答案 1 :(得分:0)

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" indent="yes"/>
<xsl:strip-space elements="*"/> 
<xsl:template match="/">
    <DocumentWrapper>
        <xsl:apply-templates select="html/body/p[@class='one']" />
        <xsl:apply-templates select="html/body/h3[1]" />
        <WrapperSpecial>
            <xsl:apply-templates select="html/body/p[@class = 'ipsum']"/>
        </WrapperSpecial>
        <xsl:apply-templates select="html/body/h3[2]" />
        <xsl:apply-templates select="html/body/p[@class='two']" />
    </DocumentWrapper>
</xsl:template>

<xsl:template match="p[@class='one'] | p[@class = 'two']">
    <paragraph>
        <xsl:apply-templates/>
    </paragraph>
</xsl:template>

<xsl:template match="p[@class='ipsum']">
    <SpecialParagraph>
        <xsl:apply-templates/>
    </SpecialParagraph>
</xsl:template>

<xsl:template match="h3">
    <heading>
        <xsl:apply-templates/>
    </heading>
</xsl:template>

</xsl:stylesheet>