如何优化此嵌套分组?

时间:2015-09-25 13:48:43

标签: xslt-1.0 muenchian-grouping

我有一个XML,如下例所示,最多可包含5000行,但我将其限制为20有点合理。

<PMT NM="rnt-model">
<PV V="L11-L23-L3448-L42375_MODEL1" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL2" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL3" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL4" C="1"></PV>
<PV V="L11-L23-L3448-L448_MODEL5" C="2"></PV>
<PV V="L11-L24-L319-L493_MODEL6" C="1"></PV>
<PV V="L11-L25-L3288-L41931_MODEL7" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL8" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL9" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL10" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL11" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL12" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL13" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL14" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL15" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL16" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL17" C="1"></PV>
<PV V="L110-L254-L3217-L41303_MODEL18" C="1"></PV>
<PV V="L110-L254-L3218-L41307_MODEL19" C="1"></PV>
<PV V="L110-L254-L3218-L41307_MODEL20" C="1"></PV>
</PMT>

必须将XML从这种伪平面格式转换为基于@V的树结构。需要首先根据下划线将@V拆分为2,然后在连字符上进行标记。

或者为了使其更加明显,下面是预期的结果。

<root>
<n id="L11">
    <n id="L23">
        <n id="L3448">
            <n id="L42375">
                <n m="MODEL1" c="1"></n>
            </n>
            <n id="L448">
                <n m="MODEL2" c="1"></n>
                <n m="MODEL3" c="1"></n>
                <n m="MODEL4" c="1"></n>
                <n m="MODEL5" c="2"></n>
            </n>
        </n>
    </n>
    <!-- rest of rows below -->

我设法使用XSLT以下工作,当行数很小时,它工作正常。但是,当使用真实的XML时,生成树需要花费大量的时间,所以我想知道如何让事情变得更有效率。在XSLT2中这很简单,但我正在为我正在进行的项目坚持使用1.0。

使用的XSLT代码(工作但效率不高)

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:key name="level1" use="@L1" match="row"/>
<xsl:key name="level2" use="@L2" match="row"/>
<xsl:key name="level3" use="@L3" match="row"/>
<xsl:key name="level4" use="@L4" match="row"/>
<xsl:template match="//PMT[@NM='rnt-model']">
    <root>
        <!-- Step 1 : generate a table with all the tokenized attributes -->
        <xsl:variable name="theTree">
            <xsl:for-each select="PV">
                <row>
                    <xsl:for-each select="@V">
                        <xsl:call-template name="tokenize_tree">
                            <xsl:with-param name="list" select="substring-before(.,'_')"/>
                            <xsl:with-param name="delimiter" select="'-'"/>
                        </xsl:call-template>
                        <xsl:attribute name="M"><xsl:value-of select="substring-after(.,'_')"/></xsl:attribute>
                    </xsl:for-each>
                    <xsl:attribute name="C"><xsl:value-of select="@C"/></xsl:attribute>
                </row>
            </xsl:for-each>
        </xsl:variable>

        <!-- Step 2 : Group all -->

        <xsl:for-each select="$theTree//row[generate-id()=generate-id(key('level1',@L1)[1])]">
            <xsl:variable name="theType" select="@L1"/>
            <n id="{$theType}">
                <xsl:for-each select="$theTree/row[@L1=$theType][generate-id()=generate-id(key('level2',@L2)[1])]">
                    <xsl:variable name="theCat" select="@L2"/>
                    <n id="{$theCat}">
                        <xsl:for-each select="$theTree/row[@L2=$theCat][generate-id()=generate-id(key('level3',@L3)[1])]">
                            <xsl:variable name="theSubCat" select="@L3"/>
                            <n id="{$theSubCat}">
                                <xsl:for-each select="$theTree/row[@L3=$theSubCat][generate-id()=generate-id(key('level4',@L4)[1])]">
                                    <xsl:variable name="theSerie" select="@L4"/>
                                    <n id="{$theSerie}">
                                        <xsl:for-each select="$theTree/row[@L4=$theSerie]">
                                            <n m="{@M}" c="{@C}"/>
                                        </xsl:for-each>
                                    </n>
                                </xsl:for-each>
                            </n>
                        </xsl:for-each>
                    </n>
                </xsl:for-each>
            </n>
        </xsl:for-each>
    </root>
</xsl:template>


<xsl:template name="tokenize_tree">
    <!--passed template parameter -->
    <xsl:param name="list"/>
    <xsl:param name="delimiter"/>
    <xsl:choose>
        <xsl:when test="contains($list, $delimiter)">
            <xsl:attribute name="{substring(substring-before($list,$delimiter),1,2)}"><xsl:value-of select="substring-before($list,$delimiter)"/></xsl:attribute>
            <!-- get everything in front of the first delimiter -->
            <xsl:call-template name="tokenize_tree">
                <!-- store anything left in another variable -->
                <xsl:with-param name="list" select="substring-after($list,$delimiter)"/>
                <xsl:with-param name="delimiter" select="$delimiter"/>
            </xsl:call-template>
        </xsl:when>
        <xsl:otherwise>
            <xsl:choose>
                <xsl:when test="$list = ''">
                    <xsl:text/>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:attribute name="{substring($list,1,2)}"><xsl:value-of select="$list"/></xsl:attribute>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:otherwise>
    </xsl:choose>
</xsl:template>
</xsl:stylesheet>

有关如何提高效率的任何想法,以便更快地使用更大的文件?

1 个答案:

答案 0 :(得分:1)

效率非常依赖于处理器,但也许你应该尝试单程方法:

XSLT 1.0

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:key name="level1" match="PV" use="substring-before(@V, '-')" />
<xsl:key name="level2" match="PV" use="substring-before(substring-after(@V, '-'),  '-')" />
<xsl:key name="level3" match="PV" use="substring-before(substring-after(substring-after(@V, '-'),  '-'),  '-')" />
<xsl:key name="level4" match="PV" use="substring-before(substring-after(substring-after(substring-after(@V, '-'),  '-'),  '-'), '_')" />

<xsl:template match="/PMT">
    <root>
        <xsl:for-each select="PV[count(. | key('level1', substring-before(@V, '-'))[1]) = 1]">
            <xsl:variable name="L1" select="substring-before(@V, '-')" />           
            <n id="{$L1}">
                <xsl:for-each select="key('level1', $L1)[count(. | key('level2', substring-before(substring-after(@V, '-'), '-'))[1]) = 1]">
                <xsl:variable name="L2" select="substring-before(substring-after(@V, '-'), '-')" />         
                    <n id="{$L2}">
                        <xsl:for-each select="key('level2', $L2)[count(. | key('level3', substring-before(substring-after(substring-after(@V, '-'),  '-'),  '-'))[1]) = 1]">
                        <xsl:variable name="L3" select="substring-before(substring-after(substring-after(@V, '-'),  '-'),  '-')" />         
                            <n id="{$L3}">
                                <xsl:for-each select="key('level3', $L3)[count(. | key('level4', substring-before(substring-after(substring-after(substring-after(@V, '-'),  '-'),  '-'), '_'))[1]) = 1]">
                                <xsl:variable name="L4" select="substring-before(substring-after(substring-after(substring-after(@V, '-'),  '-'),  '-'), '_')" />           
                                    <n id="{$L4}">
                                        <xsl:for-each select="key('level4', $L4)">
                                            <n m="{substring-after(substring-after(substring-after(substring-after(@V, '-'),  '-'),  '-'), '_')}" c="{@C}"/>
                                        </xsl:for-each>
                                    </n>
                                </xsl:for-each>
                            </n>
                        </xsl:for-each>
                    </n>
                </xsl:for-each>
            </n>
        </xsl:for-each>
    </root>
</xsl:template>

</xsl:stylesheet>

另请注意,显式路径通常比//更有效。