如何提高XSLT 1.0中的分组/排序性能

时间:2012-12-11 03:56:49

标签: xslt muenchian-grouping

我的XML看起来像 -

<autnresponse>
    <responsedata>
        <autn:hit>
            <autn:content>
                <DOCUMENT>
                    <DRETITLE>Request Audio Visual Services</DRETITLE>
                    <HIERARCHY>LEVEL1:8+My Workplace#4~LEVEL2:33+Meetings~LEVEL3:151+Book meeting room~</HIERARCHY>
                    <HIERARCHY>LEVEL1:8+My Workplace#4~LEVEL2:33+Meetings~LEVEL3:154+Support for Meeting Room Equipment~</HIERARCHY>
                </DOCUMENT>
            </autn:content>
        </autn:hit>
        <autn:hit>
            <autn:content>
                <DOCUMENT>
                    <DRETITLE>View outage calendar for SAP ICERPS system</DRETITLE>
                    <HIERARCHY>LEVEL1:4+IT Services & Access#1~LEVEL2:8+IT for IT~LEVEL3:45+SAP Services~</HIERARCHY>
                    <HIERARCHY>LEVEL1:4+IT Services & Access#1~LEVEL2:21+Corp and Consumer Systems~LEVEL3:85+Market and Sell systems~</HIERARCHY>
                    <HIERARCHY>LEVEL1:4+IT Services & Access#1~LEVEL2:23+Support Function Systems~LEVEL3:128+SAP Systems~</HIERARCHY>
                    <HIERARCHY>LEVEL1:4+IT Services & Access#1~LEVEL2:23+Support Function Systems~LEVEL3:129+Supply Chain Planning Services Systems ~</HIERARCHY>
                </DOCUMENT>
            </autn:content>
        </autn:hit>
    </responsedata>
</autnresponse>

请注意,有多个层次结构元素是level1~level2~level3的串联字符串。每个级别的格式为LEVEL:LevelID + LevelName。 LEVEL1有一个额外的值,比如排序等。基本上我需要按字母顺序按这个#数字和其他级别对所有level1进行排序。

我希望将此转换为类似的东西 -

<TREE>
    <LEVEL1 name="IT Services & Access" id="4">
        <LEVEL2 name="Corp and Consumer Systems" id="21">
            <LEVEL3 name="Market and Sell systems" id="85">
                <ITEM id="1000" name="View outage calendar for SAP ICERPS system">
                </ITEM>
            </LEVEL3>
        </LEVEL2>
        <LEVEL2 name="IT for IT" id="8">
            <LEVEL3 name="SAP Services" id="45">
                <ITEM id="1000" name="View outage calendar for SAP ICERPS system"></ITEM>
            </LEVEL3>
        </LEVEL2>
        <LEVEL2 name="Support Function Systems" id="23">
            <LEVEL3 name="SAP Systems" id="128">
                <ITEM id="1000" name="View outage calendar for SAP ICERPS system">
                </ITEM>
            </LEVEL3>
            <LEVEL3 name="Supply Chain Planning Services Systems " id="129">
                <ITEM id="1000" name="View outage calendar for SAP ICERPS system">
                </ITEM>
            </LEVEL3>
        </LEVEL2>
    </LEVEL1>
    <LEVEL1 name="My Workplace" id="8">
        <LEVEL2 name="Meetings" id="33">
            <LEVEL3 name="Book meeting room" id="151">
                <ITEM id="100" name="Request Audio Visual Services"></ITEM>
            </LEVEL3>
            <LEVEL3 name="Support for Meeting Room Equipment" id="154">
                <ITEM id="100" name="Request Audio Visual Services"></ITEM>
            </LEVEL3>
        </LEVEL2>
    </LEVEL1>
</TREE>

基本上每个DOCUMENT都有多个与之关联的层次结构。我需要将它们组合在一起,并将每个级别组合在一起。

我的XSL看起来像这样 -

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:autn="http://schemas.autonomy.com/aci/">
<xsl:output method="xml" omit-xml-declaration="yes"/>

<xsl:strip-space elements="*"/>

<xsl:key name="TOPLEVEL" match="autnresponse/responsedata/autn:hit/autn:content/DOCUMENT/HIERARCHY" use="substring-before(substring-after(.,'LEVEL1:'),'#')"/>
<xsl:key name="MIDLEVEL" match="autnresponse/responsedata/autn:hit/autn:content/DOCUMENT/HIERARCHY" use="substring-before(substring-after(.,'LEVEL2:'),'~')"/>
<xsl:key name="BOTTOMLEVEL" match="autnresponse/responsedata/autn:hit/autn:content/DOCUMENT/HIERARCHY" use="substring-before(substring-after(.,'LEVEL3:'),'~')"/>


 <xsl:template match="/">
 <TREE>
    <xsl:for-each select="autnresponse/responsedata/autn:hit/autn:content/DOCUMENT/HIERARCHY[generate-id() = generate-id(key('TOPLEVEL',substring-before(substring-before(substring-after(.,'LEVEL1:'),'~'),'#') )[1])]">

    <xsl:sort select="substring-after(substring-before(substring-after(.,'LEVEL1:'),'~'),'#')"/>

    <xsl:variable name="TOP" select="substring-before(substring-after(.,'LEVEL1:'),'#')"/>
     <LEVEL1>
        <xsl:attribute name="name"><xsl:value-of select="substring-after($TOP,'+')"/></xsl:attribute>
        <xsl:attribute name="id"><xsl:value-of select="substring-before($TOP,'+')"/></xsl:attribute>
            <xsl:for-each select="//autnresponse/responsedata/autn:hit/autn:content/DOCUMENT/HIERARCHY[substring-before(substring-after(.,'LEVEL1:'),'#')=$TOP and generate-id() = generate-id(key('MIDLEVEL',substring-before(substring-after(.,'LEVEL2:'),'~') )[1])]">
            <xsl:sort select="substring-after(substring-before(substring-after(.,'LEVEL2:'),'~'),'+')"/>

            <xsl:variable name="MID" select="substring-before(substring-after(.,'LEVEL2:'),'~')"/>
                <LEVEL2>
                    <xsl:attribute name="name"><xsl:value-of select="substring-after($MID,'+')"/></xsl:attribute>
                    <xsl:attribute name="id"><xsl:value-of select="substring-before($MID,'+')"/></xsl:attribute>
                    <xsl:for-each select="//autnresponse/responsedata/autn:hit/autn:content/DOCUMENT/HIERARCHY[substring-before(substring-after(.,'LEVEL1:'),'#')=$TOP  and substring-before(substring-after(.,'LEVEL2:'),'~')=$MID and generate-id() = generate-id(key('BOTTOMLEVEL',substring-before(substring-after(.,'LEVEL3:'),'~') )[1])]">
                    <xsl:sort select="substring-after(substring-before(substring-after(.,'LEVEL3:'),'~'),'+')"/>

                    <xsl:variable name="BOTTOM" select="substring-before(substring-after(.,'LEVEL3:'),'~')"/>
                    <LEVEL3>
                        <xsl:attribute name="name"><xsl:value-of select="substring-after($BOTTOM,'+')"/></xsl:attribute>
                        <xsl:attribute name="id"><xsl:value-of select="substring-before($BOTTOM,'+')"/></xsl:attribute>
                        <xsl:apply-templates select="//HIERARCHY[substring-before(substring-after(.,'LEVEL1:'),'#')=$TOP  and substring-before(substring-after(.,'LEVEL2:'),'~')=$MID and substring-before(substring-after(.,'LEVEL3:'),'~')=$BOTTOM]">
                            <xsl:sort select="../DREDRETITLE"/>
                        </xsl:apply-templates>
                    </LEVEL3>
                    </xsl:for-each>
                </LEVEL2>
            </xsl:for-each>
    </LEVEL1>
    </xsl:for-each>
 </TREE>
 </xsl:template>
  <xsl:template match="HIERARCHY">
    <ITEM>
        <xsl:attribute name="id"><xsl:value-of select="../ID"/></xsl:attribute>
        <xsl:attribute name="name"><xsl:value-of select="../DREDRETITLE"/></xsl:attribute>
    </ITEM>
  </xsl:template>

</xsl:stylesheet>

这方面的表现并不好,我觉得它不是最好的解决方案。我想知道是否有更好的替代解决方案可以更快地运行。基本上我有超过1200个DOCUMENT元素,XSL转换需要大约15秒

1 个答案:

答案 0 :(得分:0)

此样式表将文档预处理为输出格式而不进行任何分组,然后再进行第二次传递以执行分组任务,这应该减少您正在进行的字符串处理量,并可能导致性能提升。如果没有完整的样本并了解实施的所有细节,就很难测试性能提升。

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet
  version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:exsl="http://exslt.org/common"
  xmlns:autn="http://schemas.autonomy.com/aci/"
  exclude-result-prefixes="exsl">

  <xsl:output method="xml" omit-xml-declaration="yes"/>

  <xsl:strip-space elements="*"/>

  <xsl:key name="TOPLEVEL" match="LEVEL1" use="@name"/>
  <xsl:key name="MIDLEVEL" match="LEVEL2" use="concat(@name,parent::LEVEL1/@name)"/>
  <xsl:key name="BOTLEVEL" match="LEVEL3" use="concat(@name,parent::LEVEL2/@name,ancestor::LEVEL1/@name)"/>

  <xsl:template match="/">
    <xsl:variable name="preprocess">
      <xsl:apply-templates mode="preprocess"/>
    </xsl:variable>

    <TREE>
      <xsl:for-each
        select="exsl:node-set($preprocess)/LEVEL1[generate-id() = generate-id(key('TOPLEVEL', @name)[1])]">
        <xsl:variable name="top" select="@name"/>
        <xsl:copy>
          <xsl:copy-of select="@*"/>

          <xsl:for-each
            select="exsl:node-set($preprocess)//LEVEL2[generate-id() = generate-id(key('MIDLEVEL', concat(@name,$top))[1])]">
            <xsl:variable name="mid" select="@name"/>
            <xsl:copy>
              <xsl:copy-of select="@*"/>

              <xsl:for-each
                select="exsl:node-set($preprocess)//LEVEL3[generate-id() = generate-id(key('BOTLEVEL', concat(@name,$mid,$top))[1])]">

                <xsl:copy>
                  <xsl:copy-of select="(@*|ITEM)"/>
                </xsl:copy>
              </xsl:for-each>
            </xsl:copy>
          </xsl:for-each>
        </xsl:copy>
      </xsl:for-each>
    </TREE>
  </xsl:template>

  <xsl:template mode="preprocess" match="@*|node()">
    <xsl:apply-templates mode="preprocess" select="@*|node()"/>
  </xsl:template>

  <xsl:template mode="preprocess" match="HIERARCHY">
    <xsl:variable name="TOP" select="substring-before(substring-after(.,'LEVEL1:'),'#')"/>
    <LEVEL1
      name="{substring-after($TOP,'+')}"
      id="{substring-before($TOP,'+')}">

      <xsl:variable name="MID" select="substring-before(substring-after(.,'LEVEL2:'),'~')"/>
      <LEVEL2 
        name="{substring-after($MID,'+')}"
        id="{substring-before($MID,'+')}">

        <xsl:variable name="BOT" select="substring-before(substring-after(.,'LEVEL3:'),'~')"/>
        <LEVEL3
          name="{substring-after($BOT,'+')}"
          id="{substring-before($BOT,'+')}">

          <ITEM
            id="{../ID}"
            name="{../DRETITLE}"/>
        </LEVEL3>
      </LEVEL2>
    </LEVEL1>
  </xsl:template>

</xsl:stylesheet>

一些不科学的测试,在Saxon 6.5.5中使用您的样本数据我实际上看到了性能下降,但是Saxon 9.4.0.3增加了约25%。