使用唯一(主键)字段对XML数据进行分组

时间:2014-03-25 11:26:31

标签: xml ms-access xslt grouping

我是新手,并且在分组XML数据方面存在问题。我的数据是这样的(文件的第一部分是500例病例(患者)数据,文件末尾部分诊断和手术数据):

<?xml version="1.0" encoding="UTF-8"?>
    <rootCases>
    <caselist>
        <row>
             <CaseID>2736</CaseID>
             <!-- 500 cases and their information -->
        </row>
    </caselist>

    <!-- Diagnoses and operations start here -->

    <rootDiagnoses>
        <row>
            <CaseID>2736</CaseID>
            <DiagnoseID>15569</DiagnoseID>
            <AIS>854471.2</AIS>
            <Description>Fibula fracture, shaft on left</Description>
            <Main_Diagnose>false</Main_Diagnose>
            <OperationsID/>
            <SurgeryDate/>
            <Main_surgery/>
            <Operation_text/>
        </row>

        <row>
            <CaseID>2736</CaseID>
            <DiagnoseID>15570</DiagnoseID>
            <AIS>854251.2</AIS>
            <Description>Tibia fracture shaft on left</Description>
            <Main_Diagnose>false</Main_Diagnose>
            <OperationsID>262</OperationsID>
            <SurgeryDate>2012-03-13</SurgeryDate>
            <Main_surgery>false</Main_surgery>
            <Operation_text/>
        </row>
        <row>
            <CaseID>2736</CaseID>
            <DiagnoseID>15570</DiagnoseID>
            <AIS>854251.2</AIS>
            <Description>Tibia fracture shaft on left</Description>
            <Main_Diagnose>false</Main_Diagnose>
            <OperationsID>261</OperationsID>
            <SurgeryDate>2012-03-13</SurgeryDate>
            <Main_surgery>true</Main_surgery>
            <Operation_text>Reductio et laminofixatio Reductio et laminofixatio</Operation_text>
        </row>
    </rootDiagnoses>
</rootCases>

因此,病例可以有0 ... n诊断,诊断可以有0 ... n次手术(=手术)。其中一项操作应该作为主要操作。

我的代码如下:

<!--   Begin diagnoses list Element   -->
<xsl:variable name="caseident" select="CaseID" />
  <xsl:choose>
    <xsl:when test="/rootCases/rootDiagnoses/row/CaseID[text()=$caseident]/../AIS !=''">
      <diagnosisList>
        <xsl:for-each select="/rootCases/rootDiagnoses/row[CaseID=$caseident]">
        <!-- ERROR: Multiple diagnoses inserted when patient has 
             multiple operations per diagnose -->
           <xsl:if test="AIS !=''"><diagnosis>
              <ais2005Code><xsl:value-of select="AIS"/></ais2005Code>
              <descriptionDiagnosis><xsl:value-of select="Description"/></descriptionDiagnosis>
              <xsl:if test="SurgeryDate !='' and Main_surgery ='true'"><surgery1>
                  <date><xsl:value-of select="SurgeryDate"/></date>
                  <description><xsl:value-of select="Operation_text"/></description>
               </surgery1></xsl:if>
            </diagnosis></xsl:if>
         </xsl:for-each>
       </diagnosisList>
   </xsl:when>
  <xsl:otherwise><noDiagnosisList/></xsl:otherwise>
  </xsl:choose>

我希望得到的结果将是所有诊断的列表(每个唯一诊断仅列出一次):

<!--     Diagnoses List Element of patient 2736-->
<diagnosisList>
        <diagnosis>
           <ais2005Code>854471.2</ais2005Code>
           <descriptionDiagnosis>Fibula fracture, shaft on left</descriptionDiagnosis>
        </diagnosis>
        <diagnosis>
           <ais2005Code>854251.2</ais2005Code>
           <descriptionDiagnosis>Tibia fracture shaft on left</descriptionDiagnosis>
           <surgery1>
              <date>2012-03-13</date>
              <description>Reductio et laminofixatio Reductio et laminofixatio</description>
           </surgery1>
         </diagnosis>
</diagnosisList>

DiagnoseID是我原始数据库中的主键字段,可用于对诊断进行分组。但我不知道该怎么做。我需要的是所有诊断(用所谓的AIS代码编码),如果有操作(=手术)或与诊断相关的操作,我需要主要的手术信息(次要的,不是那么重要的手术没有显示)。目前发生的情况是,如果有多个与诊断相关的操作,我会得到多次相同的诊断。

我研究了Muenchian分组,但我不认为这是必要的,因为我有一个唯一的关键字段,可以识别每个诊断。或者是这样吗?对于这个长期(含糊不清的?)问题,我感到很遗憾,但我真的很新,不知道如何继续。感谢所有帮助!

2 个答案:

答案 0 :(得分:0)

以下样式表:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    version="1.0">
    <xsl:output indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="/">
        <xsl:variable name="diagnoses" select="rootCases/rootDiagnoses"/>
        <xsl:for-each select="rootCases/caselist/row/CaseID">
            <xsl:variable name="curr_ID" select="."/>
            <diagnosisList id="{$curr_ID}">
            <xsl:for-each select="$diagnoses/row[CaseID = $curr_ID]">
                <xsl:variable name="curr_DiagnoseID" select="DiagnoseID"/>
                <xsl:choose>
                    <!-- checks following rows and preceding rows 
                        of the current row if it has duplicate DiagnoseID.
                        If found, checks whether if the current row has 
                        a child Main_surgery = true. If none found, 
                        the current node outputs nothing.
                    -->
                    <xsl:when test="$curr_DiagnoseID = following-sibling::row/DiagnoseID or $curr_DiagnoseID = preceding-sibling::row/DiagnoseID">
                        <xsl:if test="current()/Main_surgery = 'true'">
                            <diagnosis>
                                <ais2005Code><xsl:value-of select="descendant::AIS"/></ais2005Code>
                                <description><xsl:value-of select="descendant::Description"/></description>
                                <surgery1>
                                    <date><xsl:value-of select="descendant::SurgeryDate"/></date>
                                    <description><xsl:value-of select="descendant::Operation_text"/></description>
                                </surgery1>
                            </diagnosis>
                        </xsl:if>
                    </xsl:when>
                    <xsl:otherwise>
                        <diagnosis>
                            <ais2005Code><xsl:value-of select="descendant::AIS"/></ais2005Code>
                            <description><xsl:value-of select="descendant::Description"/></description>
                            <xsl:if test="descendant::Main_surgery = 'true'">
                                <surgery1>
                                    <date><xsl:value-of select="descendant::SurgeryDate"/></date>
                                    <description><xsl:value-of select="descendant::Operation_text"/></description>
                                </surgery1>
                            </xsl:if>
                        </diagnosis>
                    </xsl:otherwise>
                </xsl:choose>
            </xsl:for-each>
            </diagnosisList>
        </xsl:for-each>
    </xsl:template>
</xsl:stylesheet>

当应用于上面的输入XML时,结果为:

<?xml version="1.0" encoding="utf-8"?>
<diagnosisList id="2736">
   <diagnosis>
      <ais2005Code>854471.2</ais2005Code>
      <description>Fibula fracture, shaft on left</description>
   </diagnosis>
   <diagnosis>
      <ais2005Code>854251.2</ais2005Code>
      <description>Tibia fracture shaft on left</description>
      <surgery1>
         <date>2012-03-13</date>
         <description>Reductio et laminofixatio Reductio et laminofixatio</description>
      </surgery1>
   </diagnosis>
</diagnosisList>

答案 1 :(得分:0)

  

我调查了Muenchian分组,但我不认为这是必要的   这里

嗯,你确实需要某些东西来获得不同的诊断 - Muenchian分组或主题的一些变化。以下是它如何工作的快速演示:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

<xsl:key name="diag-by-case" match="rootDiagnoses/row" use="CaseID" />
<xsl:key name="diag-by-diag" match="rootDiagnoses/row" use="DiagnoseID" />

<xsl:template match="/">
    <output>
        <xsl:for-each select="rootCases/caselist/row">
        <case id="{CaseID}">
            <!-- for each unique diagnosis -->
            <xsl:for-each select="key('diag-by-case', CaseID)[count(. | key('diag-by-diag', DiagnoseID)[1]) = 1]">
                <diagnosis id="{DiagnoseID}">
                </diagnosis>
            </xsl:for-each>
        </case> 
        </xsl:for-each>
    </output>
</xsl:template>

</xsl:stylesheet>

应用于您的输入,获得以下结果:

<?xml version="1.0" encoding="UTF-8"?>
<output>
  <case id="2736">
    <diagnosis id="15569"/>
    <diagnosis id="15570"/>
  </case>
</output>

或者,如果您的处理器支持,您可以使用EXSLT distinct()函数。


编辑:

这是一个更接近您预期结果的扩展版本:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>

<xsl:key name="diag-by-case" match="rootDiagnoses/row" use="CaseID" />
<xsl:key name="diag-by-diag" match="rootDiagnoses/row" use="DiagnoseID" />

<xsl:template match="/">
    <output>
        <xsl:for-each select="rootCases/caselist/row">
        <case id="{CaseID}">
            <!-- for each unique diagnosis -->
            <xsl:for-each select="key('diag-by-case', CaseID)[count(. | key('diag-by-diag', DiagnoseID)[1]) = 1]">
                <diagnosis id="{DiagnoseID}">
                    <ais2005Code><xsl:value-of select="AIS"/></ais2005Code>
                    <descriptionDiagnosis><xsl:value-of select="Description"/></descriptionDiagnosis>
                    <!-- get surgery -->
                    <xsl:for-each select="key('diag-by-diag', DiagnoseID)[Main_surgery='true']">
                        <surgery1>
                            <date><xsl:value-of select="SurgeryDate"/></date>
                            <description><xsl:value-of select="Operation_text"/></description>
                        </surgery1>
                    </xsl:for-each>
                </diagnosis>
            </xsl:for-each>
        </case> 
        </xsl:for-each>
    </output>
</xsl:template>

<强>结果

<?xml version="1.0" encoding="UTF-8"?>
<output>
   <case id="2736">
      <diagnosis id="15569">
         <ais2005Code>854471.2</ais2005Code>
         <descriptionDiagnosis>Fibula fracture, shaft on left</descriptionDiagnosis>
      </diagnosis>
      <diagnosis id="15570">
         <ais2005Code>854251.2</ais2005Code>
         <descriptionDiagnosis>Tibia fracture shaft on left</descriptionDiagnosis>
         <surgery1>
            <date>2012-03-13</date>
            <description>Reductio et laminofixatio Reductio et laminofixatio</description>
         </surgery1>
      </diagnosis>
   </case>
</output>