从多个重叠元素中提取唯一文本

时间:2012-04-08 20:08:29

标签: xml xslt xpath

如何在以下XML中为每个SectionLabels提取唯一FormDef

<XML>
<FormDef OID="F_TEST_1" Name="Test Form 1">
    <ItemGroupRef ItemGroupOID="TEST_GROUP_1" />
    <ItemGroupRef ItemGroupOID="TEST_GROUP_4" />
</FormDef>
<FormDef OID="F_TEST_2" Name="Test Form 2">
    <ItemGroupRef ItemGroupOID="TEST_GROUP_2" />
</FormDef>
<FormDef OID="F_TEST_3" Name="Test Form 3">
    <ItemGroupRef ItemGroupOID="TEST_GROUP_2"/>
    <ItemGroupRef ItemGroupOID="TEST_GROUP_3"/>
</FormDef>
<FormDef OID="F_TEST_4" Name="Test Form 4">
    <ItemGroupRef ItemGroupOID="TEST_GROUP_4"/>
</FormDef>

<ItemGroupDef OID="TEST_GROUP_1" Name="Ungrouped">
    <ItemRef ItemOID="I_MSA1_INIT" />
    <ItemRef ItemOID="I_MSA1_FORMD" />
    <ItemRef ItemOID="I_MSA1_MSA1_CONS" />
    <ItemRef ItemOID="I_MSA1_MSA1_PGT" />
</ItemGroupDef>
<ItemGroupDef OID="TEST_GROUP_2" Name="MSA1_complyreasG" >
    <ItemRef ItemOID="I_MSA1_MSA1_NOELIGREAS" />
    <ItemRef ItemOID="I_MSA1_MSA1_COMPLYREAS" />
</ItemGroupDef>
<ItemGroupDef OID="TEST_GROUP_3" Name="Ungrouped">
    <ItemRef ItemOID="I_MSA2_INIT" />
    <ItemRef ItemOID="I_MSA2_FROMD" />
    <ItemRef ItemOID="I_MSA2_IDV" />
    <ItemRef ItemOID="I_MSA2_MSA2_INITBF" />
</ItemGroupDef>
<ItemGroupDef OID="TEST_GROUP_4" Name="MSA2_POARTprecG">
    <ItemRef ItemOID="I_MSA2_MSA2_POARTPREC" />
    <ItemRef ItemOID="I_MSA2_MSA2_POARTNBV" />
    <ItemRef ItemOID="I_MSA2_MSA2_LOARTPREC" />
</ItemGroupDef>

<ItemDef OID="I_MSA1_INIT">
    <ItemDetails ItemOID="I_MSA1_INIT">
        <ItemPresentInForm FormOID="F_TEST_1">
            <SectionLabel>Section1</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA1_FORMD">
    <ItemDetails ItemOID="I_MSA1_FORMD">
        <ItemPresentInForm FormOID="F_TEST_1">
            <SectionLabel>Section2</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA1_MSA1_CONS">
    <ItemDetails ItemOID="I_MSA1_MSA1_CONS">
        <ItemPresentInForm FormOID="F_TEST_1">
            <SectionLabel>Section3</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA1_MSA1_NOELIGREAS">
    <ItemDetails ItemOID="I_MSA1_MSA1_NOELIGREAS">
        <ItemPresentInForm FormOID="F_TEST_2">
            <SectionLabel>Section1</SectionLabel>
        </ItemPresentInForm>
        <ItemPresentInForm FormOID="F_TEST_3">
            <SectionLabel>Section1</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA1_MSA1_COMPLYREAS">
    <ItemDetails ItemOID="I_MSA1_MSA1_COMPLYREAS">
        <ItemPresentInForm FormOID="F_TEST_2">
            <SectionLabel>Section2</SectionLabel>
        </ItemPresentInForm>
        <ItemPresentInForm FormOID="F_TEST_3">
            <SectionLabel>Section2</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA2_INIT">
    <ItemDetails ItemOID="I_MSA2_INIT">
        <ItemPresentInForm FormOID="F_TEST_3">
            <SectionLabel>Section1</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA2_FROMD">
    <ItemDetails ItemOID="I_MSA2_FROMD">
        <ItemPresentInForm FormOID="F_TEST_3">
            <SectionLabel>Section2</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA2_IDV">
    <ItemDetails ItemOID="I_MSA2_IDV">
        <ItemPresentInForm FormOID="F_TEST_3">
            <SectionLabel>Section3</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA2_MSA2_POARTPREC">
    <ItemDetails ItemOID="I_MSA2_MSA2_POARTPREC">
        <ItemPresentInForm FormOID="F_TEST_1">
            <SectionLabel>Section1</SectionLabel>
        </ItemPresentInForm>
        <ItemPresentInForm FormOID="F_TEST_4">
            <SectionLabel>Section1</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA2_MSA2_POARTNBV">
    <ItemDetails ItemOID="I_MSA2_MSA2_POARTNBV">
        <ItemPresentInForm FormOID="F_TEST_1">
            <SectionLabel>Section2</SectionLabel>
        </ItemPresentInForm>
        <ItemPresentInForm FormOID="F_TEST_4">
            <SectionLabel>Section2</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
<ItemDef OID="I_MSA2_MSA2_LOARTPREC">
    <ItemDetails ItemOID="I_MSA2_MSA2_LOARTPREC">
        <ItemPresentInForm FormOID="F_TEST_1">
            <SectionLabel>Section3</SectionLabel>
        </ItemPresentInForm>
        <ItemPresentInForm FormOID="F_TEST_4">
            <SectionLabel>Section3</SectionLabel>
        </ItemPresentInForm>
    </ItemDetails>
</ItemDef>
</XML>

首先我定义一个键:

<xsl:key name="labels" match="ItemDef/ItemDetails/ItemPresentInForm" use="@FormOID" />

然后在选择中使用它:

<xsl:variable name="sections" 
                select="//*[local-name()='ItemDef']/*[local-name()='ItemDetails']/*[local-name()='ItemPresentInForm']
                           [generate-id() = generate-id(key('labels', @FormOID))]">

                <xsl:value-of select="./SectionLabel" />

            </xsl:variable>

但是这会回来:

SECTION1
SECTION1
SECTION1
SECTION1

将键更改为SectionLabel上的音高:

<xsl:key name="labels" match="ItemDef/ItemDetails/ItemPresentInForm" use="SectionLabel" />

要转换的XSLT:

                <xsl:variable name="sections" 
                select="//*[local-name()='ItemDef']/*[local-name()='ItemDetails']/*[local-name()='ItemPresentInForm']
                           [generate-id(.) = generate-id(key('labels', SectionLabel))]">

                <xsl:value-of select="./SectionLabel" />

            </xsl:variable>

返回:

SECTION1
第2节
Section3中
Section4
SECTION1
第2节
Section3中
Section4
SECTION1
第2节
Section3中
Section4
SECTION1
第2节
Section3中
Section4

当我遍历每个FormDef时,文档中的所有节标签都是。我的目标是获取一个输出,其中只提取当前FormDef的节标签。

预期输出应为:

Form OID="F_TEST_1"
Labels: Section1, Section2, Section3 

Form OID="F_TEST_2"
Labels: Section1, Section2 

Form OID="F_TEST_3"
Labels: Section1, Section2, Section3 

Form OID="F_TEST_4"
Labels: Section1, Section2, Section3 

由于

1 个答案:

答案 0 :(得分:0)

此转化

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>

 <xsl:key name="kLabelsInForm" match="SectionLabel"
  use="concat(../@FormOID, '+', .)"/>

 <xsl:template match="FormDef">
  Form OID="<xsl:value-of select="@OID"/>"
  Labels: <xsl:text/>

  <xsl:variable name="vOID" select="@OID"/>
  <xsl:for-each select=
  "../ItemDef/*/*/SectionLabel
      [generate-id()
      =
       generate-id(key('kLabelsInForm',
                        concat($vOID, '+', .)
                       )
                        [1]
                   )
      ]
  ">
    <xsl:value-of select="concat(., ' ')"/>
  </xsl:for-each>
 </xsl:template>
</xsl:stylesheet>

应用于提供的XML文档

<XML>
    <FormDef OID="F_TEST_1" Name="Test Form 1">
        <ItemGroupRef ItemGroupOID="TEST_GROUP_1" />
        <ItemGroupRef ItemGroupOID="TEST_GROUP_4" />
    </FormDef>
    <FormDef OID="F_TEST_2" Name="Test Form 2">
        <ItemGroupRef ItemGroupOID="TEST_GROUP_2" />
    </FormDef>
    <FormDef OID="F_TEST_3" Name="Test Form 3">
        <ItemGroupRef ItemGroupOID="TEST_GROUP_2"/>
        <ItemGroupRef ItemGroupOID="TEST_GROUP_3"/>
    </FormDef>
    <FormDef OID="F_TEST_4" Name="Test Form 4">
        <ItemGroupRef ItemGroupOID="TEST_GROUP_4"/>
    </FormDef>
    <ItemGroupDef OID="TEST_GROUP_1" Name="Ungrouped">
        <ItemRef ItemOID="I_MSA1_INIT" />
        <ItemRef ItemOID="I_MSA1_FORMD" />
        <ItemRef ItemOID="I_MSA1_MSA1_CONS" />
        <ItemRef ItemOID="I_MSA1_MSA1_PGT" />
    </ItemGroupDef>
    <ItemGroupDef OID="TEST_GROUP_2" Name="MSA1_complyreasG" >
        <ItemRef ItemOID="I_MSA1_MSA1_NOELIGREAS" />
        <ItemRef ItemOID="I_MSA1_MSA1_COMPLYREAS" />
    </ItemGroupDef>
    <ItemGroupDef OID="TEST_GROUP_3" Name="Ungrouped">
        <ItemRef ItemOID="I_MSA2_INIT" />
        <ItemRef ItemOID="I_MSA2_FROMD" />
        <ItemRef ItemOID="I_MSA2_IDV" />
        <ItemRef ItemOID="I_MSA2_MSA2_INITBF" />
    </ItemGroupDef>
    <ItemGroupDef OID="TEST_GROUP_4" Name="MSA2_POARTprecG">
        <ItemRef ItemOID="I_MSA2_MSA2_POARTPREC" />
        <ItemRef ItemOID="I_MSA2_MSA2_POARTNBV" />
        <ItemRef ItemOID="I_MSA2_MSA2_LOARTPREC" />
    </ItemGroupDef>
    <ItemDef OID="I_MSA1_INIT">
        <ItemDetails ItemOID="I_MSA1_INIT">
            <ItemPresentInForm FormOID="F_TEST_1">
                <SectionLabel>Section1</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA1_FORMD">
        <ItemDetails ItemOID="I_MSA1_FORMD">
            <ItemPresentInForm FormOID="F_TEST_1">
                <SectionLabel>Section2</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA1_MSA1_CONS">
        <ItemDetails ItemOID="I_MSA1_MSA1_CONS">
            <ItemPresentInForm FormOID="F_TEST_1">
                <SectionLabel>Section3</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA1_MSA1_NOELIGREAS">
        <ItemDetails ItemOID="I_MSA1_MSA1_NOELIGREAS">
            <ItemPresentInForm FormOID="F_TEST_2">
                <SectionLabel>Section1</SectionLabel>
            </ItemPresentInForm>
            <ItemPresentInForm FormOID="F_TEST_3">
                <SectionLabel>Section1</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA1_MSA1_COMPLYREAS">
        <ItemDetails ItemOID="I_MSA1_MSA1_COMPLYREAS">
            <ItemPresentInForm FormOID="F_TEST_2">
                <SectionLabel>Section2</SectionLabel>
            </ItemPresentInForm>
            <ItemPresentInForm FormOID="F_TEST_3">
                <SectionLabel>Section2</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA2_INIT">
        <ItemDetails ItemOID="I_MSA2_INIT">
            <ItemPresentInForm FormOID="F_TEST_3">
                <SectionLabel>Section1</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA2_FROMD">
        <ItemDetails ItemOID="I_MSA2_FROMD">
            <ItemPresentInForm FormOID="F_TEST_3">
                <SectionLabel>Section2</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA2_IDV">
        <ItemDetails ItemOID="I_MSA2_IDV">
            <ItemPresentInForm FormOID="F_TEST_3">
                <SectionLabel>Section3</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA2_MSA2_POARTPREC">
        <ItemDetails ItemOID="I_MSA2_MSA2_POARTPREC">
            <ItemPresentInForm FormOID="F_TEST_1">
                <SectionLabel>Section1</SectionLabel>
            </ItemPresentInForm>
            <ItemPresentInForm FormOID="F_TEST_4">
                <SectionLabel>Section1</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA2_MSA2_POARTNBV">
        <ItemDetails ItemOID="I_MSA2_MSA2_POARTNBV">
            <ItemPresentInForm FormOID="F_TEST_1">
                <SectionLabel>Section2</SectionLabel>
            </ItemPresentInForm>
            <ItemPresentInForm FormOID="F_TEST_4">
                <SectionLabel>Section2</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
    <ItemDef OID="I_MSA2_MSA2_LOARTPREC">
        <ItemDetails ItemOID="I_MSA2_MSA2_LOARTPREC">
            <ItemPresentInForm FormOID="F_TEST_1">
                <SectionLabel>Section3</SectionLabel>
            </ItemPresentInForm>
            <ItemPresentInForm FormOID="F_TEST_4">
                <SectionLabel>Section3</SectionLabel>
            </ItemPresentInForm>
        </ItemDetails>
    </ItemDef>
</XML>

生成所需的正确结果(每个表单的唯一SectionLabel值):

  Form OID="F_TEST_1"
  Labels: Section1 Section2 Section3 

  Form OID="F_TEST_2"
  Labels: Section1 Section2 

  Form OID="F_TEST_3"
  Labels: Section1 Section2 Section3 

  Form OID="F_TEST_4"
  Labels: Section1 Section2 Section3 

解释

  • Muenchian分组方法,其中要分组的元素使用复合键编制索引。

  • 表单中的每个唯一部分值都由SectionLabel字符串值与其父级FormOID属性的字符串值的串联标识。

  • 通过这种方式,不同形式的相同SectionLabel值会产生不同的密钥,并且不会只计算一次。

以下是Muenchian分组的一些很好的资源