我有一个使用TEI构建的XML文件:
<div type="chapter" n="1">
<p>
<s xml:id="e_1">sentence e1.</s>
<s xml:id="f_1">sentence f1</s>
</p>
<p>
<s xml:id="e_2"> sentence e2</s>
<s xml:id="f_2"> sentence f2</s>
</p>
</div>
<div type="chapter" n="2">
<!-- -->
</div>
我需要将其转换为此HTML结构:
<div>
<h1>Chapter 1</h1>
<div class="book-content">
<p>
<span class='source-language-sent' data-source-id='1'>sentence e1.</span>
<span id='1' style='display:none'>sentence f1</span>
</p>
<p>
<span class='source-language-sent' data-source-id='2'>sentence e2</span>
<span id='2' style='display:none'>sentence f2</span>
</p>
</div>
</div>
<div>
<h1>Chapter 2</h1>
<div class="book-content">
<!-- -->
</div>
</div>
现在我使用这个XSLT文件:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0" version="1.0">
<xsl:output method="html" encoding="UTF-8" indent="yes" />
<xsl:template match="tei:body">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="tei:teiHeader">
<xsl:comment>
<xsl:apply-templates select="node()" />
</xsl:comment>
</xsl:template>
<!--create chapter-->
<xsl:template match="tei:div">
<xsl:element name="div">
<xsl:element name="div">
<xsl:attribute name="class">
<xsl:text>book-content</xsl:text>
</xsl:attribute>
<xsl:element name="h1">
<xsl:text>Chapter</xsl:text>
<xsl:value-of select="@n" />
</xsl:element>
<xsl:apply-templates select="node()" />
</xsl:element>
</xsl:element>
</xsl:template>
<!-- create p-->
<xsl:template match="tei:p">
<xsl:element name="p">
<xsl:apply-templates />
</xsl:element>
</xsl:template>
<!-- create s-->
<xsl:template match="tei:s">
<xsl:variable name="xmlid" select="@xml:id" />
<xsl:if test="starts-with($xmlid, 'e')">
<xsl:element name="span">
<xsl:attribute name="class">
<xsl:text>source-language-sent</xsl:text>
</xsl:attribute>
<xsl:attribute name="data-source-id">
<xsl:value-of select="substring($xmlid, 3, 4)" />
</xsl:attribute>
<xsl:apply-templates select="node()" />
</xsl:element>
</xsl:if>
<xsl:if test="starts-with($xmlid, 'f')">
<xsl:element name="span">
<xsl:attribute name="style">
<xsl:text>display:none</xsl:text>
</xsl:attribute>
<xsl:attribute name="id">
<xsl:value-of select="substring($xmlid, 3, 4)" />
</xsl:attribute>
<xsl:apply-templates select="node()" />
</xsl:element>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
我的问题是我需要创建一个新的<div class="book-content">
foreach 900个字符。但是我不想剪切s
元素,因此我需要计算在一个s
中包含多少<div class="book-content">
元素以包含900个字符。< / p>
答案 0 :(得分:2)
这是一个有趣的问题,但是你的例子还有很多其他的事情在发生。我更喜欢使用我自己的例子来孤立地解决这个问题。
考虑以下输入:
<强> XML 强>
<book>
<chapter id="A">
<para>
<sentence id="1" length="23">Mary had a little lamb,</sentence>
<sentence id="2" length="29">His fleece was white as snow,</sentence>
<sentence id="3" length="30">And everywhere that Mary went,</sentence>
</para>
<para>
<sentence id="4" length="24">The lamb was sure to go.</sentence>
<sentence id="5" length="34">He followed her to school one day,</sentence>
</para>
<para>
<sentence id="6" length="27">Which was against the rule,</sentence>
<sentence id="7" length="35">It made the children laugh and play</sentence>
<sentence id="8" length="24">To see a lamb at school.</sentence>
</para>
<para>
<sentence id="9" length="34">And so the teacher turned it out, </sentence>
<sentence id="10" length="27">But still it lingered near.</sentence>
</para>
</chapter>
<chapter id="B">
<para>
<sentence id="11" length="35">Summertime, and the livin' is easy.</sentence>
<sentence id="12" length="40">Fish are jumpin' and the cotton is high.</sentence>
<sentence id="13" length="52">Oh, Your daddy's rich and your mamma's good lookin'.</sentence>
<sentence id="14" length="35">So hush little baby, don't you cry.</sentence>
<sentence id="15" length="54">One of these mornings you're going to rise up singing.</sentence>
</para>
<para>
<sentence id="16" length="57">Then you'll spread your wings and you'll take to the sky.</sentence>
<sentence id="17" length="35">So hush little baby, don't you cry.</sentence>
</para>
</chapter>
</book>
注意 :length
值仅供参考;我们不会在解决方案中使用它们。
我们的任务是通过仅移动整个句子,将总长度超过200个字符的每个章节分成几个章节,同时保留句子组之间的原始范围边界。
XSLT 1.0
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:exsl="http://exslt.org/common"
xmlns:set="http://exslt.org/sets"
extension-element-prefixes="exsl set">
<xsl:output method="xml" version="1.0" encoding="utf-8" indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- identity transform -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="chapter">
<xsl:call-template name="split-chapter">
<xsl:with-param name="nodes" select="para/sentence"/>
</xsl:call-template>
</xsl:template>
<xsl:template name="split-chapter">
<xsl:param name="nodes"/>
<xsl:param name="limit" select="200"/>
<xsl:param name="remaining-nodes" select="dummy-node"/>
<!-- 1. Calculate the total length of nodes -->
<xsl:variable name="lengths">
<xsl:for-each select="$nodes">
<length>
<xsl:value-of select="string-length()" />
</length>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="total-length" select="sum(exsl:node-set($lengths)/length)" />
<!-- 2. Process the chapter: -->
<xsl:choose>
<!-- If chapter is too long and can be shortened ... -->
<xsl:when test="$total-length > $limit and count($nodes) > 1">
<!-- ... try again with one node less. -->
<xsl:call-template name="split-chapter">
<xsl:with-param name="nodes" select="$nodes[not(position()=last())]"/>
<xsl:with-param name="remaining-nodes" select="$remaining-nodes | $nodes[last()]"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<!-- Otherwise create a chapter with the current nodes ... -->
<chapter id="{@id}" length="{$total-length}" >
<!-- ... list the paras participating in this chapter ... -->
<xsl:for-each select="$nodes/parent::para">
<para>
<!-- ... and process the nodes still left in each para. -->
<xsl:apply-templates select="set:intersection(sentence, $nodes)"/>
</para>
</xsl:for-each>
</chapter>
<!-- Then process any remaining nodes. -->
<xsl:if test="$remaining-nodes">
<xsl:call-template name="split-chapter">
<xsl:with-param name="nodes" select="$remaining-nodes"/>
</xsl:call-template>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
<强>结果强>
<?xml version="1.0" encoding="utf-8"?>
<book>
<chapter id="A" length="167">
<para>
<sentence id="1" length="23">Mary had a little lamb,</sentence>
<sentence id="2" length="29">His fleece was white as snow,</sentence>
<sentence id="3" length="30">And everywhere that Mary went,</sentence>
</para>
<para>
<sentence id="4" length="24">The lamb was sure to go.</sentence>
<sentence id="5" length="34">He followed her to school one day,</sentence>
</para>
<para>
<sentence id="6" length="27">Which was against the rule,</sentence>
</para>
</chapter>
<chapter id="A" length="120">
<para>
<sentence id="7" length="35">It made the children laugh and play</sentence>
<sentence id="8" length="24">To see a lamb at school.</sentence>
</para>
<para>
<sentence id="9" length="34">And so the teacher turned it out, </sentence>
<sentence id="10" length="27">But still it lingered near.</sentence>
</para>
</chapter>
<chapter id="B" length="162">
<para>
<sentence id="11" length="35">Summertime, and the livin' is easy.</sentence>
<sentence id="12" length="40">Fish are jumpin' and the cotton is high.</sentence>
<sentence id="13" length="52">Oh, Your daddy's rich and your mamma's good lookin'.</sentence>
<sentence id="14" length="35">So hush little baby, don't you cry.</sentence>
</para>
</chapter>
<chapter id="B" length="146">
<para>
<sentence id="15" length="54">One of these mornings you're going to rise up singing.</sentence>
</para>
<para>
<sentence id="16" length="57">Then you'll spread your wings and you'll take to the sky.</sentence>
<sentence id="17" length="35">So hush little baby, don't you cry.</sentence>
</para>
</chapter>
</book>