使用xslt删除字符串中的特定html标记

时间:2014-05-26 06:45:58

标签: html xml string xslt

我的xml中有一个包含html格式标签的字符串。我想删除一些,如字体系列,但保留段落,理想的粗体和斜体字体样式。

我找到了几个引用从字符串中删除所有html并使用示例代码执行此操作的引用。但是我留下了一个不包含段落符号的字符串,这在我的上下文中很重要,而且还有nbsp个字符。我试图创建一个变量来进一步使用字符串,但我被困在这里。

这是我在剥离所有html标签时得到的结果:

First test paragraph nisi deserunt.  Second test paragraph nisi ut aliquip ex ea commodo.  Third test paragraph dolor in reprehenderit in officia deserunt.  

这是我想要的:

First test paragraph nisi deserunt.

Second test paragraph nisi ut aliquip ex ea commodo.

Third test paragraph dolor in reprehenderit in officia deserunt

是否可以使用xslt从字符串中删除特定的html标签并将其他人留在原地或替换它们以实现类似的东西,它是一个带有所需html的字符串?我在XSL 1.0版中编写,处理器是应用程序中的IE。我是xslt的新手,非常感谢任何帮助。

这是我的xslt:

<?xml version="1.0" encoding="UTF-8"?>
 <xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<xsl:template match="/xml/exhibition">

<html>
    <head>
    </head>
    <body>      

<!--main structure-->        
      <xsl:for-each select="objects/object[string(subtheme)!='']"> 
            <xsl:sort select="subtheme"/>
            <xsl:variable name="cursection" select="string(subtheme)"/>

            <xsl:if test="count(preceding-sibling::node()[subtheme=$cursection])=0">
                <xsl:call-template name="output_by_section">
                    <xsl:with-param name="parent" select="parent::node()"/>
                    <xsl:with-param name="field" select="$cursection"/>
                </xsl:call-template>
            </xsl:if>      
        </xsl:for-each>     
    </body>
</html>
</xsl:template>

<!--removes all html from string-->
<xsl:template name="RemoveHtml">
  <xsl:param name="String"/>
  <xsl:choose>
    <xsl:when test="contains($String, '&lt;')">
      <xsl:value-of select="substring-before($String, '&lt;')"/>
       <xsl:call-template name="RemoveHtml">
        <xsl:with-param name="String"
          select="normalize-space(substring-after($String, '&gt;'))"/>
      </xsl:call-template>
    </xsl:when>
    <xsl:otherwise>
      <xsl:value-of select="normalize-space($String)"/>
    </xsl:otherwise>
  </xsl:choose>
</xsl:template>

<!--template that provides main structure to my xslt-->
<xsl:template name="output_by_section">
    <xsl:param name="parent"/>
    <xsl:param name="field"/>

    <xsl:for-each select="$parent/object[subtheme=$field]"> 
    <div style="margin-bottom: 20px">

<!--calling template RemoveHtml-->  
    <xsl:variable name="stripped"> 
    <xsl:call-template name="RemoveHtml">
    <xsl:with-param name="String" select="textentryhtml"/>
    </xsl:call-template>
    </xsl:variable>
<!--attempting to add paragraph breaks-->
    <xsl:choose>
        <xsl:when test="contains($stripped, 'nbsp;')">
            <p><xsl:value-of select='substring-before($stripped, "nbsp;")'/></p>
            <p><xsl:value-of select='substring-after($stripped, "nbsp;")'/></p>
        </xsl:when>
        <xsl:otherwise>
            <xsl:value-of select="$stripped" />
        </xsl:otherwise>
    </xsl:choose>           
    </div>
    <div>
        <!--formatted text with full html for reference-->  
        <xsl:value-of select="textentryhtml" disable-output-escaping="yes" /> 
    </div>  
    </xsl:for-each>

</xsl:template>
</xsl:stylesheet>

这是相关xml的示例:

<?xml version="1.0"?>
<xml>
  <exhibition>
    <objects>
      <object>
        <textentryhtml>
          &lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;&lt;FONT size=3&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;&lt;?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /&gt;&lt;o:p&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;First test paragraph nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt.&lt;/SPAN&gt;
          &lt;P style="MARGIN: 0cm 0cm 0pt" class=MsoNormal&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;&lt;o:p&gt;&amp;nbsp;&lt;/o:p&gt;&lt;/SPAN&gt;&lt;/P&gt;
          &lt;P style="MARGIN: 0cm 0cm 0pt" class=MsoNormal&gt;&lt;I style="mso-bidi-font-style: normal"&gt;&lt;SPAN style="COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Calibri; mso-fareast-language: EN-AU"&gt;&lt;FONT face=Calibri&gt;Second test paragraph nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in. &lt;o:p&gt;&lt;/o:p&gt;&lt;/FONT&gt;&lt;/SPAN&gt;&lt;/I&gt;&lt;/P&gt;
          &lt;P style="MARGIN: 0cm 0cm 0pt" class=MsoNormal&gt;&lt;SPAN style="FONT-FAMILY: 'Times New Roman','serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-fareast-language: EN-AU"&gt;&lt;o:p&gt;&amp;nbsp;&lt;/o:p&gt;&lt;/SPAN&gt;&lt;/P&gt;
          &lt;P style="MARGIN: 0cm 0cm 2.25pt" class=MsoNormal&gt;&lt;B style="mso-bidi-font-weight: normal"&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;Third test paragraph &lt;/SPAN&gt;&lt;/B&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in &lt;/SPAN&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 20pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt;voluptate velit esse cillum dolore&lt;/SPAN&gt;&lt;SPAN style="FONT-FAMILY: 'ITCFranklinGothic LT Com Bk','sans-serif'; COLOR: black; FONT-SIZE: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-bidi-font-family: Arial; mso-fareast-language: EN-AU"&gt; eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt.&lt;/SPAN&gt;&lt;/P&gt;
          &lt;P style="MARGIN: 0cm 0cm 0pt" class=MsoNormal&gt;&lt;/SPAN&gt;&lt;/o:p&gt;&lt;/SPAN&gt;&amp;nbsp;&lt;/P&gt;&lt;/FONT&gt;&lt;/SPAN&gt;
        </textentryhtml>
      </object>
    </objects>
  </exhibition>
</xml>

</xsl:template>
</xsl:stylesheet>

0 个答案:

没有答案