片段输入xhtml以分隔具有祖先节点详细信息的xhtml文件

时间:2012-10-23 22:48:17

标签: xml xslt-2.0 xpath-2.0

有人可以帮我解决这个问题。真的很感激。

要求:

  1. 为每个pal:fragment元素生成单独的html文件 附加源文件,文件根据名称获取 @ fragment-id出现在pal:fragment元素上。
  2. 'pal:fragment'可以嵌套,用'div'元素包裹。
  3. 碎片文件不仅应包含pal:fragment的所有子元素, 但也应该包括它的祖先'div'元素。 从'pal:fragment'生成的每个单独文件都不应包含 其后代'pal:fragment content。
  4. 的内容

    请参阅下面的需求理解示例

    example1:如果为“pal:fragment”fragment-id =“DLM3989039”生成片段文件,则内容应如下所示(删除html标记以便于理解)

    <div class="bill" id="DLM3988900">
        <div class="schedule-group">
            <div class="schedule" id="DLM3989039">
                <p>DLM3989039 dummy text </p>
                <div class="schedule-provisions"></div>
            </div>
        </div>
    </div>
    

    示例2 :如果为“pal:fragment”fragment-id =“DLM3989044”生成片段文件,则内容应如下所示。 (删除了html标记以便于理解)

    <div class="bill" id="DLM3988900">
        <div class="schedule-group">
            <div class="schedule" id="DLM3989039">
                <div class="schedule-provisions">
                    <div class="part" id="DLM3989044">
                        <p>DLM3989044 dummy content</p>
                    </div>
                </div>
            </div>
        </div>
    </div>      
    

    XML文档我用过

    <?xml version="1.0" encoding="UTF-8"?>
    <div class="bill" id="DLM3988900" xmlns:pal="http://www.google.nz/rendition-info"
        xmlns="http://www.w3.org/1999/xhtml">
        <div class="billdetail">
            <pal:fragment fragment-id="DLM3988901" fragment-type="explnote">
                <div class="explnote" id="DLM3988901">
                    <p>DLM3988901 dummy text</p>
                </div>
            </pal:fragment>
        </div>
        <pal:fragment fragment-id="DLM3988906" fragment-type="contents">
            <div class="cover" id="DLM3988906">
                <p>DLM3988906 dummy text</p>
            </div>
            <div class="body" id="DLM3988910">
                <pal:fragment fragment-id="DLM3988963" fragment-type="part">
                    <div class="part" id="DLM3988963">
                        <p>DLM3988963 dummy text</p>
                        <pal:fragment fragment-id="DLM3988965" fragment-type="prov">
                            <div class="prov" id="DLM3988965">
                                <p>DLM3988965 dummy text</p>
                            </div>
                        </pal:fragment>
                    </div>
                </pal:fragment>
                <pal:fragment fragment-id="DLM3989003" fragment-type="part">
                    <div class="part" id="DLM3989003">
                        <p>DLM3989003 dummy text</p>
                        <pal:fragment fragment-id="DLM3989004" fragment-type="subpart">
                            <div class="subpart" id="DLM3989004">
                                <p>DLM3989004 dummy text</p>
                                <pal:fragment fragment-id="DLM3989005" fragment-type="prov">
                                    <div class="prov" id="DLM3989005">
                                        <p>DLM3989005 dummy text</p>
                                    </div>
                                </pal:fragment>
                            </div>
                        </pal:fragment>
                    </div>
                </pal:fragment>
            </div>
        </pal:fragment>
        <div class="schedule-group">
            <pal:fragment fragment-id="DLM3989039" fragment-type="schedule">
                <div class="schedule" id="DLM3989039">
                    <p>DLM3989039 dummy text </p>
                    <div class="schedule-provisions">
                        <pal:fragment fragment-id="DLM3989044" fragment-type="part">
                            <div class="part" id="DLM3989044">
                                <p>DLM3989044 dummy content</p>
                                <pal:fragment fragment-id="DLM3989057" fragment-type="subpart">
                                    <div class="subpart" id="DLM3989057">
                                        <p>DLM3989057 dummy content</p>
                                        <pal:fragment fragment-id="DLM3989059" fragment-type="prov">
                                            <div class="prov" id="DLM3989059">
                                                <p> DLM3989059 dummy coent</p>
                                            </div>
                                        </pal:fragment>
                                    </div>
                                </pal:fragment>
                            </div>
                        </pal:fragment>
                    </div>
                </div>
            </pal:fragment>
        </div>
    </div>
    

    =============

    我创建的XSLT样式表:

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:foo="http://www.google.nz/rendition-info"
        xmlns:html="http://www.w3.org/1999/xhtml" exclude-result-prefixes="xs foo html" version="2.0">
    
        <xsl:strip-space elements="*"/>
    
        <xsl:param name="target_directory" select="''" as="xs:string"/>
        <xsl:param name="input_doc_loc"
            select="xs:anyURI(substring-before(document-uri(/),$input_file_name))" as="xs:anyURI"/>
        <xsl:param name="output_dir" as="xs:anyURI">
            <xsl:choose>
                <xsl:when test="$target_directory = ''">
                    <xsl:value-of select="$input_doc_loc"/>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:value-of select="concat($target_directory, '/')"/>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:param>
    
        <xsl:param name="input_file_name" as="xs:anyURI">
            <xsl:variable name="filename">
                <xsl:call-template name="get_file_name">
                    <xsl:with-param name="file_name" select="document-uri(/)" as="xs:anyURI"/>
                </xsl:call-template>
            </xsl:variable>
            <xsl:value-of select="$filename"/>
        </xsl:param>
    
        <xsl:template name="get_file_name">
            <xsl:param name="file_name" as="xs:anyURI"/>
            <xsl:choose>
                <xsl:when test="contains($file_name, '/')">
                    <xsl:call-template name="get_file_name">
                        <xsl:with-param name="file_name"
                            select="xs:anyURI(substring-after($file_name, '/'))"/>
                    </xsl:call-template>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:value-of select="$file_name"/>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:template>
    
        <xsl:param name="frag_file_ext" select="'.html'" as="xs:string"/>
    
        <xsl:output media-type="text/html" method="html"/>
    
        <xsl:template name="generate_output_doc">
            <xsl:variable name="fragment_name" as="xs:anyURI"
                select="xs:anyURI(concat(@fragment-id,$frag_file_ext))"/>
            <xsl:variable name="fragment_file" as="xs:anyURI"
                select="xs:anyURI(concat($output_dir,$fragment_name))"/>
    
            <xsl:variable name="child-content" select="node()"/>
    
            <xsl:variable name="ancestor-divs" as="node()*">
                <xsl:apply-templates select="parent::html:div[1]" mode="div-content"/>
                <xsl:apply-templates select="node()"/>
            </xsl:variable>
            <xsl:variable name="reverse-ancestor-divs" as="node()*">
                <xsl:sequence select="reverse($ancestor-divs)"/>
            </xsl:variable>
    
    
            <xsl:result-document href="{$fragment_file}">
                <html xmlns="http://www.w3.org/1999/xhtml">
                    <head>
                        <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
                    </head>
                    <body>
                        <xsl:sequence select="$ancestor-divs"/>
                    </body>
                </html>
            </xsl:result-document>
        </xsl:template>
    
        <xsl:template match="foo:fragment">
            <xsl:call-template name="generate_output_doc"/>
        </xsl:template>
    
        <xsl:template match="html:div">
            <xsl:copy>
                <xsl:apply-templates select="@*|node()"/>
            </xsl:copy>
        </xsl:template>
    
        <xsl:template match="html:div" mode="div-content">
            <xsl:copy>
                <xsl:apply-templates select="@*"/>
                <xsl:apply-templates select="ancestor::html:div[1]" mode="div-content"/>
            </xsl:copy>
        </xsl:template>
    
        <!-- The standard identity template -->
        <xsl:template match="*| @*">
            <xsl:copy>
                <xsl:apply-templates select="@*|node()"/>
            </xsl:copy>
        </xsl:template>
    
    </xsl:stylesheet>
    

    ============

    虽然使用上面附带/提到的示例文件,但生成的单独html文件的数量是“11”,其中(文件数)是预期的。 我的XSLT样式表存在问题。生成的输出如下所示(下面的mentioend):

    1. 'foo:fragment'的祖先节点没有正确嵌套,并且不包含片段内容,因为它是子/后代,而是写为兄弟。
    2. 生成由“DLM3989039”生成的html单独文件'pal:fragment'元素

      <html xmlns="http://www.w3.org/1999/xhtml">
         <head>
            <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
         </head>
         <body>
            <div class="schedule-group">
               <div class="bill" id="DLM3988900"></div>
            </div>
            <div xmlns:foo="http://www.google.nz/rendition-info" class="schedule" id="DLM3989039">
               <p>DLM3989039 dummy text </p>
               <div class="schedule-provisions"></div>
            </div>
         </body>
      </html>
      

      谢谢和问候, 苏雷什。

1 个答案:

答案 0 :(得分:0)

我不确定我是否完全理解您的要求,但这是我对您认为想要实现的目标的实施:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:pal="http://www.google.nz/rendition-info"
    xmlns:html="http://www.w3.org/1999/xhtml" exclude-result-prefixes="xs pal html" version="2.0"
    xmlns="http://www.w3.org/1999/xhtml">

<xsl:param name="frag_file_ext" select="'.html'" as="xs:string"/>

<xsl:output method="xhtml" indent="yes" media-type="text/html" omit-xml-declaration="yes"/>
<xsl:strip-space elements="*"/>

<xsl:template match="/">
  <xsl:apply-templates select="//pal:fragment" mode="doc"/>
</xsl:template>

<xsl:template match="pal:fragment" mode="doc">
  <xsl:result-document href="{@fragment-id}{$frag_file_ext}">
      <html>
          <head>
          </head>
          <body>
              <xsl:apply-templates select="ancestor::html:div[last()]">
                <xsl:with-param name="doc-frag" as="element(pal:fragment)" select="current()" tunnel="yes"/>
              </xsl:apply-templates>
          </body>
      </html>
  </xsl:result-document>
</xsl:template>

<xsl:template match="@* | node()">
  <xsl:copy>
    <xsl:apply-templates select="@* , node()"/>
  </xsl:copy>
</xsl:template>

<xsl:template match="html:*">
  <xsl:param name="doc-frag" as="element(pal:fragment)" tunnel="yes"/>
  <xsl:variable name="doc-anc" as="element()*" select="$doc-frag/ancestor-or-self::*"/>
  <xsl:choose>
    <xsl:when test="$doc-anc[. is current()]">
      <xsl:element name="{name()}">
        <xsl:apply-templates select="@* , *[descendant-or-self::*[. is $doc-frag]]"/>
      </xsl:element>
    </xsl:when>
    <xsl:when test="ancestor::pal:fragment[. is $doc-frag]">
      <xsl:element name="{name()}">
        <xsl:apply-templates select="@* , node()"/>
      </xsl:element>
    </xsl:when>
  </xsl:choose>
</xsl:template>

<xsl:template match="pal:fragment">
  <xsl:param name="doc-frag" as="element(pal:fragment)" tunnel="yes"/>
  <xsl:if test="$doc-frag/ancestor-or-self::pal:fragment[. is current()]">
    <xsl:apply-templates/>
  </xsl:if>
</xsl:template>

</xsl:stylesheet>

当我将带有Saxon 9.4 HE的样式表应用于输入

<?xml version="1.0" encoding="UTF-8"?>
<div class="bill" id="DLM3988900" xmlns:pal="http://www.google.nz/rendition-info"
    xmlns="http://www.w3.org/1999/xhtml">
    <div class="billdetail">
        <pal:fragment fragment-id="DLM3988901" fragment-type="explnote">
            <div class="explnote" id="DLM3988901">
                <p>DLM3988901 dummy text</p>
            </div>
        </pal:fragment>
    </div>
    <pal:fragment fragment-id="DLM3988906" fragment-type="contents">
        <div class="cover" id="DLM3988906">
            <p>DLM3988906 dummy text</p>
        </div>
        <div class="body" id="DLM3988910">
            <pal:fragment fragment-id="DLM3988963" fragment-type="part">
                <div class="part" id="DLM3988963">
                    <p>DLM3988963 dummy text</p>
                    <pal:fragment fragment-id="DLM3988965" fragment-type="prov">
                        <div class="prov" id="DLM3988965">
                            <p>DLM3988965 dummy text</p>
                        </div>
                    </pal:fragment>
                </div>
            </pal:fragment>
            <pal:fragment fragment-id="DLM3989003" fragment-type="part">
                <div class="part" id="DLM3989003">
                    <p>DLM3989003 dummy text</p>
                    <pal:fragment fragment-id="DLM3989004" fragment-type="subpart">
                        <div class="subpart" id="DLM3989004">
                            <p>DLM3989004 dummy text</p>
                            <pal:fragment fragment-id="DLM3989005" fragment-type="prov">
                                <div class="prov" id="DLM3989005">
                                    <p>DLM3989005 dummy text</p>
                                </div>
                            </pal:fragment>
                        </div>
                    </pal:fragment>
                </div>
            </pal:fragment>
        </div>
    </pal:fragment>
    <div class="schedule-group">
        <pal:fragment fragment-id="DLM3989039" fragment-type="schedule">
            <div class="schedule" id="DLM3989039">
                <p>DLM3989039 dummy text </p>
                <div class="schedule-provisions">
                    <pal:fragment fragment-id="DLM3989044" fragment-type="part">
                        <div class="part" id="DLM3989044">
                            <p>DLM3989044 dummy content</p>
                            <pal:fragment fragment-id="DLM3989057" fragment-type="subpart">
                                <div class="subpart" id="DLM3989057">
                                    <p>DLM3989057 dummy content</p>
                                    <pal:fragment fragment-id="DLM3989059" fragment-type="prov">
                                        <div class="prov" id="DLM3989059">
                                            <p> DLM3989059 dummy coent</p>
                                        </div>
                                    </pal:fragment>
                                </div>
                            </pal:fragment>
                        </div>
                    </pal:fragment>
                </div>
            </div>
        </pal:fragment>
    </div>
</div>

我得到11个DMLxxx.html结果文件

Mode                LastWriteTime     Length Name
----                -------------     ------ ----
-a---        24.10.2012     11:38        387 DLM3988901.html
-a---        24.10.2012     11:38        375 DLM3988906.html
-a---        24.10.2012     11:38        393 DLM3988963.html
-a---        24.10.2012     11:38        468 DLM3988965.html
-a---        24.10.2012     11:38        393 DLM3989003.html
-a---        24.10.2012     11:38        471 DLM3989004.html
-a---        24.10.2012     11:38        552 DLM3989005.html
-a---        24.10.2012     11:38        447 DLM3989039.html
-a---        24.10.2012     11:38        549 DLM3989044.html
-a---        24.10.2012     11:38        639 DLM3989057.html
-a---        24.10.2012     11:38        731 DLM3989059.html

其中例如DLM3988901.html

<html xmlns="http://www.w3.org/1999/xhtml">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
   </head>
   <body>
      <div class="bill" id="DLM3988900">
         <div class="billdetail">
            <div class="explnote" id="DLM3988901">
               <p>DLM3988901 dummy text</p>
            </div>
         </div>
      </div>
   </body>
</html>

DLM3989044.html

<html xmlns="http://www.w3.org/1999/xhtml">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
   </head>
   <body>
      <div class="bill" id="DLM3988900">
         <div class="schedule-group">
            <div class="schedule" id="DLM3989039">
               <div class="schedule-provisions">
                  <div class="part" id="DLM3989044">
                     <p>DLM3989044 dummy content</p>
                  </div>
               </div>
            </div>
         </div>
      </div>
   </body>
</html>