XSLT和复杂Xpath

时间:2014-12-04 09:50:41

标签: xslt xpath

这个可转换的HTML5:

<!DOCTYPE html>
<html>
    <head>
    </head>
    <body>
        <table border="1">
            <caption>Complex Table</caption>
            <tbody>
                <tr>
                    <th>Title</th>
                    <th>Volume</th>
                    <th>Chapter</th>
                    <th>Stds.</th>
                    <th>Dept.</th>
                </tr>
                <tr>
                    <td rowspan="6">STEM</td>
                    <td rowspan="1">1</td>
                    <td rowspan="2">1</td>
                    <td>1 to 10</td>
                    <td rowspan="2">Biology</td>
                </tr>
                <tr>
                    <td rowspan="1">2</td>
                    <td>20 to 30</td>
                </tr>
                <tr>
                    <td rowspan="1">3</td>
                    <td rowspan="1">2</td>
                    <td>40 to 60</td>
                    <td rowspan="1">Chemistry</td>
                </tr>
                <tr>
                    <td>4</td>
                    <td>3</td>
                    <td>70 to 80</td>
                    <td>Physics</td>
                </tr>
                <tr>
                    <td rowspan="4">5</td>
                    <td rowspan="1">4</td>
                    <td>80 to 120</td>
                    <td rowspan="1">Math</td>
                </tr>
                <tr>
                    <td rowspan="1">5</td>
                    <td>120 to 135</td>
                    <td rowspan="1">Geometry</td>
                </tr>
            </tbody>
        </table>
        <table border="1">
            <caption>Simpler Table</caption>
            <tbody>
                <tr>
                    <th>Title</th>
                    <th>Volume</th>
                    <th>Chapter</th>
                    <th>Stds.</th>
                    <th>Dept.</th>
                </tr>
                <tr>
                    <td colspan="1" rowspan="3">Kinesiology</td>
                    <td>1</td>
                    <td>1</td>
                    <td>A to C</td>
                    <td>Strength</td>
                </tr>
                <tr>
                    <td>2</td>
                    <td>2 to 3</td>
                    <td>D to H</td>
                    <td>Agility</td>
                </tr>
                <tr>
                    <td>3</td>
                    <td>4</td>
                    <td>I to X</td>
                    <td>Flexibility</td>
                </tr>
            </tbody>
        </table>
        <table border="1">
            <caption>Simplest Table</caption>
            <tbody>
                <tr>
                    <th>Title</th>
                    <th>Volume</th>
                    <th>Chapter</th>
                    <th>Stds.</th>
                    <th>Dept.</th>
                </tr>
                <tr>
                    <td>Skills</td>
                    <td>1</td>
                    <td>1</td>
                    <td>A to C</td>
                    <td>Keyboard</td>
                </tr>
            </tbody>
        </table>        
    </body>
</html>

这个所需的输出(如果你查看渲染的HTML,你可以看到想要的数据模式):

<?xml version="1.0" encoding="UTF-8"?>
<production>
    <book title="STEM" volume="1"/>
    <book title="STEM" volume="2"/>
    <book title="STEM" volume="3"/>
    <book title="STEM" volume="4"/>
    <book title="STEM" volume="5"/>
    <book title="Kinesiology" volume="1"/>
    <book title="Kinesiology" volume="2"/>
    <book title="Kinesiology" volume="3"/>
    <book title="Skills" volume="1"/>
</production>

不太有效的转变:

<xsl:stylesheet
    version="2.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="xs">

    <xsl:output method="xml" encoding="UTF-8" indent="yes" />

    <xsl:template match="/">
        <catalog>
            <xsl:apply-templates/>
        </catalog>
    </xsl:template>

    <xsl:template match="text()"/>

    <!-- multi-volume edition -->
    <xsl:template match="table">
        <xsl:variable name="title" select="descendant::td[1]"/>
        <xsl:variable name="context-td" select="."/>
        <!-- the following needs work -->
        <xsl:for-each select="descendant::tr/td[1][matches(.,'\d+$')]">
            <book>
                <xsl:attribute name="title" select="$title"/>
                <xsl:attribute name="volume" select="."/>
            </book>            
        </xsl:for-each>
    </xsl:template>

    <!-- single-volume edition -->
    <xsl:template match="table[count(descendant::tr) &lt; 3]">
        <book>
            <xsl:attribute name="title" select="descendant::td[1]"/>
            <xsl:attribute name="volume" select="descendant::tr[2]/td[2]"/>
        </book>            
    </xsl:template>        
</xsl:stylesheet>

for-each中的xpath需要工作。我尝试了各种轴,但没有找到适用于所有用例的轴。

3 个答案:

答案 0 :(得分:2)

这不可能是简单的:

XSLT 2.0

<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8" indent="yes" />

<xsl:template match="/">
    <catalog>
        <xsl:apply-templates select="html/body/table"/>
    </catalog>
</xsl:template>

<xsl:template match="table">
    <xsl:variable name="title" select="tbody/tr[2]/td[1]"/>
    <xsl:for-each select="tbody/tr[2]/td[2] | tbody/tr[position() > 2]/td[1]">
        <book>
            <xsl:attribute name="title" select="$title"/>
            <xsl:attribute name="volume" select="."/>
        </book>            
    </xsl:for-each>
</xsl:template>

</xsl:stylesheet>

哎呀,我看到STEM的第5卷被列出两次有问题 - 坚持......


不,我没有看到简单的解决方案。我怀疑你必须深入研究表的结构,考虑前面的行扫描 - 有点类似于: Please suggest for XSLT code for Table rowspan and colspan issues


编辑:

好的,我相信这应该有效:

XSLT 2.0

<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8" indent="yes" />

<xsl:template match="/">
    <catalog>
        <xsl:apply-templates select="html/body/table"/>
    </catalog>
</xsl:template>

<xsl:template match="table">
    <xsl:apply-templates select="tbody/tr[2]/td[2]">
        <xsl:with-param name="title" select="tbody/tr[2]/td[1]" tunnel="yes"/>
    </xsl:apply-templates>
</xsl:template>

<xsl:template match="td">
    <xsl:param name="title" tunnel="yes"/>
    <book>
        <xsl:attribute name="title" select="$title"/>
        <xsl:attribute name="volume" select="."/>
    </book>  
    <xsl:variable name="rowspan" select="if(@rowspan) then @rowspan else 1" />
    <xsl:apply-templates select="parent::tr/following-sibling::tr[number($rowspan)]/td[1]"/>
</xsl:template> 

</xsl:stylesheet>

测试,以下列形式应用于修改后的输入: enter image description here

http://xsltransform.net/94hvTz1/2

答案 1 :(得分:1)

我尝试了分组:

<xsl:stylesheet
    version="2.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="xs">

    <xsl:output method="xml" encoding="UTF-8" indent="yes" />

    <xsl:template match="/">
        <catalog>
          <xsl:apply-templates select="//table"/>
        </catalog>
    </xsl:template>

    <xsl:template match="table">
            <xsl:for-each-group select="tbody/tr[position() gt 1]/td[1]" group-by="../../(tr[2]/td[2] | tr[position() gt 2]/td[1])">
              <book title="{.}" volume="{current-grouping-key()}"/>
            </xsl:for-each-group>
    </xsl:template>

</xsl:stylesheet>

答案 2 :(得分:0)

这对任何机会都有帮助(对michael.hor257k的回答几乎没有变化):

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8" indent="yes" />
<xsl:template match="/">
    <catalog>
        <xsl:apply-templates select="html/body/table"/>
    </catalog>
</xsl:template>
<xsl:template match="table">
    <xsl:variable name="title" select="tbody/tr[2]/td[1]"/>
    <xsl:variable name="table-id" select="generate-id()"/>
    <xsl:for-each select="tbody/tr[2]/td[2] | tbody/tr[position() > 2]/td[1]">
        <xsl:variable name="curr-td" select="."/>
        <xsl:if test="not(exists(following::tr[td[1][generate-id(../../..) = $table-id and . = $curr-td]]))">
            <book>
                <xsl:attribute name="title" select="$title"/>
                <xsl:attribute name="volume" select="."/>
            </book>
        </xsl:if>
    </xsl:for-each>
</xsl:template>
</xsl:stylesheet>