从元素创建新qnames的XSLT转换

时间:2010-12-30 20:32:28

标签: xml xslt coldfusion

我需要转换xml输出中的数据表,如下所示。 C1列1 c2 column2等

<?xml version="1.0" encoding="UTF-8"?>
<report>
    <report_header>
        <c1>desc</c1>
        <c2>prname</c2>
        <c3>prnum</c3>
        <c4>cdate</c4>
        <c5>phase</c5>
        <c6>stype</c6>
        <c7>status</c7>
        <c8>parent</c8>
        <c9>location</c9>
    </report_header>
    <report_row>
        <c1></c1>
        <c2>IT Project Message Validation</c2>
        <c3>IT-0000021</c3>
        <c4>12/14/2010 09:56 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
    <report_row>
        <c1></c1>
        <c2>David, Michael John Morning QA Test</c2>
        <c3>IT-0000020</c3>
        <c4>12/14/2010 08:12 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
</report>

进入

<?xml version="1.0" encoding="UTF-8"?>
<report>
    <report_row>
        <desc></desc>
        <prname>IT Project Message Validation</prname>
        <prnum>IT-0000021</prnum>
        <cdate>12/14/2010 09:56 AM</cdate>
        <phase>Preparation</phase>
        <stype>IT Projects</stype>
        <status>Active</status>
        <parent>IT</parent>
        <location>/IT/BIOMED</location>
    </report_row>
    <report_row>
        <desc></desc>
        <prname>David, Michael John Morning QA Test</prname>
        <prnum>IT-0000020</prnum>
        <cdate>12/14/2010 08:12 AM</cdate>
        <phase>Preparation</phase>
        <stype>IT Projects</stype>
        <status>Active</status>
        <parent>IT</parent>
        <location>/IT/BIOMED</location>
    </report_row>
</report>

我当前的xslt看起来像这样

<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  xmlns:exslt="http://exslt.org/common">
<xsl:output method="xml" indent="yes"/>

<xsl:template match="/">
    <report>
        <xsl:apply-templates select="/report/report_row"/>          
    </report>
</xsl:template>

<xsl:template match="/report/report_row">
<report_row>
    <xsl:apply-templates select="c1"/>
    <xsl:apply-templates select="c2"/>
    <xsl:apply-templates select="c3"/>
    <xsl:apply-templates select="c4"/>
    <xsl:apply-templates select="c5"/>
    <xsl:apply-templates select="c6"/>
    <xsl:apply-templates select="c7"/>
    <xsl:apply-templates select="c8"/>
    <xsl:apply-templates select="c9"/>
</report_row>
</xsl:template> 

<xsl:template match="c1">   
    <xsl:element name="{/report/report_header/c1}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c2">   
    <xsl:element name="{/report/report_header/c2}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template> 
<xsl:template match="c3">   
    <xsl:element name="{/report/report_header/c3}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c4">   
    <xsl:element name="{/report/report_header/c4}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template> 
<xsl:template match="c5">   
    <xsl:element name="{/report/report_header/c5}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template> 
<xsl:template match="c6">   
    <xsl:element name="{/report/report_header/c6}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c7">   
    <xsl:element name="{/report/report_header/c7}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c8">   
    <xsl:element name="{/report/report_header/c8}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>
<xsl:template match="c9">   
    <xsl:element name="{/report/report_header/c9}"><xsl:value-of select="current()"/></xsl:element>
</xsl:template>

</xsl:transform>

如果我假设列数最多且列标题可以是合法的qnames,则我的转换有效。

当我获得超过我假设的100个限制以及带有空格的列标题时,它开始失败。

如何创建使用通配符的转换,如何从列标题中删除空格和非法字符以使其成为合法的qnames?

由于

4 个答案:

答案 0 :(得分:4)

此转化

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:variable name="vAlphanum" select=
  "concat('ABCDEFGHIJKLMNOPQRSTUVWXYZ',
          'abcdefghijklmnopqrstuvwxyz',
          '_0123456789'
         )
  "/>

 <xsl:variable name="vReps" select=
  "'_____________________________________'"/>
 <xsl:key name="kColNameByCode"
       match="report_header/*/text()"
       use="name(..)"/>

 <xsl:template match="node()|@*">
  <xsl:copy>
   <xsl:apply-templates select="node()|@*"/>
  </xsl:copy>
 </xsl:template>

 <xsl:template match="report_row/*">
  <xsl:variable name="vNameText" select=
   "key('kColNameByCode', name())"/>

  <xsl:variable name="vElName" select=
  "translate($vNameText,
             translate($vNameText,$vAlphanum,''),
             $vReps)
  "/>
  <xsl:element name="{$vElName}">
    <xsl:value-of select="."/>
  </xsl:element>
 </xsl:template>
 <xsl:template match="report_header"/>
</xsl:stylesheet>

应用于提供的XML文档

<report>
    <report_header>
        <c1>desc</c1>
        <c2>pr name</c2>
        <c3>pr num</c3>
        <c4>cdate</c4>
        <c5>phase</c5>
        <c6>stype</c6>
        <c7>status</c7>
        <c8>parent</c8>
        <c9>location</c9>
    </report_header>
    <report_row>
        <c1></c1>
        <c2>IT Project Message Validation</c2>
        <c3>IT-0000021</c3>
        <c4>12/14/2010 09:56 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
    <report_row>
        <c1></c1>
        <c2>David, Michael John Morning QA Test</c2>
        <c3>IT-0000020</c3>
        <c4>12/14/2010 08:12 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
</report>

生成想要的正确结果

<report>
   <report_row>
      <desc/>
      <pr_name>IT Project Message Validation</pr_name>
      <pr_num>IT-0000021</pr_num>
      <cdate>12/14/2010 09:56 AM</cdate>
      <phase>Preparation</phase>
      <stype>IT Projects</stype>
      <status>Active</status>
      <parent>IT</parent>
      <location>/IT/BIOMED</location>
   </report_row>
   <report_row>
      <desc/>
      <pr_name>David, Michael John Morning QA Test</pr_name>
      <pr_num>IT-0000020</pr_num>
      <cdate>12/14/2010 08:12 AM</cdate>
      <phase>Preparation</phase>
      <stype>IT Projects</stype>
      <status>Active</status>
      <parent>IT</parent>
      <location>/IT/BIOMED</location>
   </report_row>
</report>

请注意

  1. 转换成功将包含任意数量的不同非字母数字字符的文本转换为语法正确的XML名称。

  2. 使用密钥实现效率

答案 1 :(得分:2)

我知道这已经得到了解答,但我想我会在ColdFusion中包含一个StAX版本,因为这个问题最初是被标记的。如果被使用XSLT的OoM错误所困扰,将为后代提供服务:

<!--- see: http://today.java.net/pub/a/today/2006/07/20/introduction-to-stax.html --->
<cfset XMLOutputFactory = createObject("java", "javax.xml.stream.XMLOutputFactory").newInstance()>
<cfset fos = createObject("java", "java.io.FileOutputStream").init("#getDirectoryFromPath(getCurrentTemplatePath())#/destination.xml")>
<cfset bos = createObject("java", "java.io.BufferedOutputStream").init(fos)>
<cfset writer = XMLOutputFactory.createXMLStreamWriter(bos)>

<cfset fis = createObject("java", "java.io.FileInputStream").init("#getDirectoryFromPath(getCurrentTemplatePath())#/source.xml")>
<cfset bis = createObject("java", "java.io.BufferedInputStream").init(fis)>
<cfset XMLInputFactory = createObject("java", "javax.xml.stream.XMLInputFactory").newInstance()>
<cfset reader = XMLInputFactory.createXMLStreamReader(bis)>

<cfset headers = {}>
<cfset isHeaderRow = true>

<cfloop condition="#reader.hasNext()#">
    <cfset event = reader.next()>
    <cfif event EQ reader.START_ELEMENT>
        <cfswitch expression="#reader.getLocalName()#">
            <cfcase value="report">
                <cfset isHeaderRow = false>
                <cfset writer.writeStartElement(reader.getLocalName())>
            </cfcase>
            <cfcase value="report_header">
                <cfset isHeaderRow = true>
            </cfcase>
            <cfcase value="report_row">
                <cfset writer.writeStartElement(reader.getLocalName())>
                <cfset isHeaderRow = false>
            </cfcase>
            <!--- cX node --->
            <cfdefaultcase>
                <cfif isHeaderRow>
                    <!--- alphanumerics and underscores only --->
                    <cfset headers[reader.getLocalName()] = rereplacenocase(reader.getElementText(), "[^A-Z0-9\_]*", "", "all")>
                <cfelse>
                    <!--- getElementText --->
                    <cfset writer.writeStartElement(headers[reader.getLocalName()])>
                    <cfset writer.writeCharacters(reader.getElementText())>
                    <cfset writer.writeEndElement()>
                </cfif>
            </cfdefaultcase>
        </cfswitch>
    <cfelseif event EQ reader.END_ELEMENT>
        <cfif isHeaderRow><cfcontinue/></cfif>
        <cfset writer.writeEndElement()>
    </cfif>
</cfloop>

<cfset reader.close()>

<cfset writer.flush()>
<cfset writer.close()>

<!--- don't do this w/a large file b/c you'll get an OOM error --->
<cffile action="read" file="#getDirectoryFromPath(getCurrentTemplatePath())#/destination.xml" variable="transformed">
<cfdump var="#transformed#">

答案 2 :(得分:1)

您可以使用翻译功能从名称中删除不需要的字符。

您可以使用常规通配符模板来匹配任何子元素;使用模式来防止这种情况妨碍常规通配符模板。您可以使用local-name函数按元素名称查找元素。

<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  xmlns:exslt="http://exslt.org/common">
<xsl:output method="xml" indent="yes"/>

<xsl:template match="/">
    <report>
        <xsl:apply-templates select="/report/report_row"/>          
    </report>
</xsl:template>

<xsl:template match="/report/report_row">
<report_row>
    <xsl:apply-templates mode="wildcard"/>
</report_row>
</xsl:template> 

<xsl:template match="*" mode="wildcard">
    <xsl:variable name="elemname" select="local-name()"/>
    <xsl:variable name="elemcontent" select="/report/report_header/*[local-name()=$elemname]"/>
    <xsl:element name='{translate($elemcontent," ()","")}'><xsl:value-of select="current()"/></xsl:element>
</xsl:template>

</xsl:transform>

答案 3 :(得分:1)

考虑以下样式表:

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:key name="getHeaderText" match="report_header/*" use="local-name()"/>

<xsl:template match="node() | @*">
    <xsl:copy>
        <xsl:apply-templates select="node() | @*"/>        
    </xsl:copy>
</xsl:template>

<xsl:template match="report_row/*">
    <xsl:element name="{ translate( key('getHeaderText', local-name()), ' ', '_') }">        
        <xsl:apply-templates/>
    </xsl:element>
</xsl:template>

<xsl:template match="report_header"/>

</xsl:stylesheet>

应用于标题中带空格的XML:

<report>
    <report_header>
        <c1>desc</c1>
        <c2>pr name</c2>
        <c3>pr num</c3>
        <c4>cdate</c4>
        <c5>phase</c5>
        <c6>stype</c6>
        <c7>status</c7>
        <c8>parent</c8>
        <c9>location</c9>
    </report_header>
    <report_row>
        <c1></c1>
        <c2>IT Project Message Validation</c2>
        <c3>IT-0000021</c3>
        <c4>12/14/2010 09:56 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
    <report_row>
        <c1></c1>
        <c2>David, Michael John Morning QA Test</c2>
        <c3>IT-0000020</c3>
        <c4>12/14/2010 08:12 AM</c4>
        <c5>Preparation</c5>
        <c6>IT Projects</c6>
        <c7>Active</c7>
        <c8>IT</c8>
        <c9>/IT/BIOMED</c9>
    </report_row>
</report>

它产生了这个结果:

<report>
    <report_row>
       <desc/>
       <pr_name>IT Project Message Validation</pr_name>
       <pr_num>IT-0000021</pr_num>
       <cdate>12/14/2010 09:56 AM</cdate>
       <phase>Preparation</phase>
       <stype>IT Projects</stype>
       <status>Active</status>
       <parent>IT</parent>
       <location>/IT/BIOMED</location>
    </report_row>
    <report_row>
       <desc/>
       <pr_name>David, Michael John Morning QA Test</pr_name>
       <pr_num>IT-0000020</pr_num>
       <cdate>12/14/2010 08:12 AM</cdate>
       <phase>Preparation</phase>
       <stype>IT Projects</stype>
       <status>Active</status>
       <parent>IT</parent>
       <location>/IT/BIOMED</location>
    </report_row>
</report>