选择具有相同节点名称的数据并合并XML文件中的数据

时间:2013-05-14 09:55:47

标签: xml

我正在尝试从uniprot XML文件中选择一些数据,并且我能够获得我想要的大部分内容,但是我遇到了在同一节点中有更多条目的数据输出问题。并且最好将它们组合在一起。

XML代码:

<?xml version='1.0' encoding='UTF-8'?>
<?xml-stylesheet href="test_will7.xslt" type="text/xsl" ?>

<uniprot>

<entry dataset="Swiss-Prot" created="1993-04-01" modified="2012-11-28" version="118">
<accession>P30443</accession>
<accession>O77964</accession>
<name>1A01_HUMAN</name>
<protein>
<recommendedName>
<fullName>HLA class I histocompatibility antigen, A-1 alpha chain</fullName>
</recommendedName>
</protein>
<gene>
<name type="primary">HLA-A</name>
<name type="synonym">HLAA</name>
</gene>
</comment>
<comment type="subcellular location">
<subcellularLocation>
<location>Membrane</location>
<topology>Single-pass type I membrane protein</topology>
</subcellularLocation>
</comment>
<dbReference type="GO" id="GO:0031901">
<property type="term" value="C:early endosome membrane"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
<dbReference type="GO" id="GO:0012507">
<property type="term" value="C:ER to Golgi transport vesicle membrane"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
<dbReference type="GO" id="GO:0000139">
<property type="term" value="C:Golgi membrane"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
</entry>

<entry dataset="Swiss-Prot" created="1986-07-21" modified="2012-11-28" version="151">
<accession>P01892</accession>
<accession>O19619</accession>
<accession>P06338</accession>
<name>1A02_HUMAN</name>
<protein>
<recommendedName>
<fullName>HLA class I histocompatibility antigen, A-2 alpha chain</fullName>
</recommendedName>
</protein>
<gene>
<name type="primary">HLA-A</name>
<name type="synonym">HLAA</name>
</gene>
<comment type="subcellular location">
<subcellularLocation>
<location>Membrane</location>
<topology>Single-pass type I membrane protein</topology>
</subcellularLocation>
</comment>
<dbReference type="GO" id="GO:0060333">
<property type="term" value="P:interferon-gamma-mediated signaling pathway"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
</entry>

<entry dataset="Swiss-Prot" created="1987-08-13" modified="2012-11-28" version="124">
<accession>P04439</accession>
<name>1A03_HUMAN</name>
<protein>
<recommendedName>
<fullName>HLA class I histocompatibility antigen, A-3 alpha chain</fullName>
</recommendedName>
</protein>
<gene>
<name type="primary">HLA-A</name>
<name type="synonym">HLAA</name>
</gene>
<comment type="subcellular location">
<subcellularLocation>
<location>Membrane</location>
<topology>Single-pass type I membrane protein</topology>
</subcellularLocation>
</comment>
<dbReference type="GO" id="GO:0005887">
<property type="term" value="C:integral to plasma membrane"/>
<property type="evidence" value="NAS:UniProtKB"/>
</dbReference>
<dbReference type="GO" id="GO:0019048">
<property type="term" value="P:virus-host interaction"/>
<property type="evidence" value="IEA:UniProtKB-KW"/>
</dbReference>
</entry>
</uniprot>

我的xslt文件现在看起来像这样。但是,我仍然做错了,因为它不起作用。也许是因为不同的水平?

<?xml version="1.0" ?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

  <xsl:template match="/">
    <html>
      <body>
        <h2>My Selection</h2>
        <table border="1">
          <tr bgcolor="#9acd32">
            <th>Name</th>
            <th>GeneName</th>
            <th>AccessionNr</th>
            <th>ProteinName</th>
            <th>SubcellularLocation</th>
            <th>TissueSpecificity</th>
            <th>GOID</th>
            <th>GOName</th>
          </tr>
          <xsl:apply-templates/>
        </table>
      </body>
    </html>
  </xsl:template>

  <xsl:template match="uniprot/entry">
    <tr>
      <xsl:apply-templates select="name|gene/name|accession|protein/recommendedName/fullName|comment[@type = 'subcellular location']/subcellularLocation/location|comment[@type = 'tissue specificty']/text|dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"/>
    </tr>
  </xsl:template>

      <xsl:template match="name|gene/name|accession|protein/recommendedName/fullName|comment[@type = 'subcellular location']/subcellularLocation/location|comment[@type = 'tissue specificty']/text|dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value">
    <xsl:choose>
      <xsl:when test="name()='dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value' and not(preceding-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value)">
        <td>
          <xsl:value-of select="."/>
          <xsl:if test="following-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value">
            <xsl:text>;</xsl:text>
            <xsl:for-each select="following-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value">
              <xsl:value-of select="."/>
              <xsl:if test="position()!=last()">
                <xsl:text>;</xsl:text>
              </xsl:if>
            </xsl:for-each>
          </xsl:if>
        </td>
      </xsl:when>
      <xsl:when test="name()='dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value' and preceding-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"/>
      <xsl:otherwise>
        <td>
          <xsl:value-of select="."/>
        </td>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

</xsl:stylesheet>

我想要的输出:

Name GeneName AccessionNr ProteinName SubcellularLocation GOID_ GOName 
1A01_HUMAN  HLA-A  P30443  HLA class I histocompatibility antigen, A-1 alpha chain  Membrane  GO:0031901- C:early endosome membrane; GO:0012507- C:ER to Golgi transport vesicle membrane; GO:0000139- C:Golgi membrane
1A02_HUMAN  HLA-A  P01892  HLA class I histocompatibility antigen, A-2 alpha chain  Membrane  GO:0060333-P:interferon-gamma-mediated signaling pathway 
1A03_HUMAN  HLA-A  P04439  HLA class I histocompatibility antigen, A-3 alpha chain  Membrane  GO:0005887- C:integral to plasma membrane; GO:0019048- P:virus-host interaction

如果这太难了,也可能是这样的:

Name GeneName AccessionNr ProteinName SubcellularLocation GOID  GOName 
1A01_HUMAN  HLA-A  P30443  HLA class I histocompatibility antigen, A-1 alpha chain  Membrane  GO:0031901; GO:0012507; GO:0000139  C:early endosome membrane; C:ER to Golgi transport vesicle membrane; C:Golgi membrane
1A02_HUMAN  HLA-A  P01892  HLA class I histocompatibility antigen, A-2 alpha chain  Membrane  GO:0060333  P:interferon-gamma-mediated signaling pathway 
1A03_HUMAN  HLA-A  P04439  HLA class I histocompatibility antigen, A-3 alpha chain  Membrane  GO:0005887; GO:0019048  C:integral to plasma membrane; P:virus-host interaction

我知道这很多,很难区分一切。我可以阅读代码,但修复错误或编写新内容仍然非常困难! (我是XML的新手) 谢谢!

1 个答案:

答案 0 :(得分:1)

我已从XML输入中删除了“有机体”并创建了新的xslt以获得所需的输出:

<强> XSLT:

<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
  <xsl:template match="uniprot">
    <html>
      <body>
        <h2>My Selection</h2>
        <table border="1">
          <tr bgcolor="#9acd32">
            <th>Name</th>
            <th>GeneName</th>
            <th>AccessionNr</th>
            <th>ProteinName</th>
            <th>SubcellularLocation</th>
            <th>TissueSpecificity</th>
            <th>GOID</th>
            <th>GOName</th>
          </tr>
          <xsl:for-each select="entry">
            <tr>
              <td><xsl:value-of select="name"/></td>
              <td><xsl:value-of select="gene/name"/></td>
              <td><xsl:value-of select="accession"/></td>
              <td><xsl:value-of select="protein/recommendedName/fullName"/></td>
              <td><xsl:value-of select="comment[@type = 'subcellular location']/subcellularLocation/location"/></td>
              <td><xsl:value-of select="comment[@type = 'tissue specificty']"/></td>
              <td>
                <xsl:for-each select="dbReference[@type = 'GO']">
                  <xsl:value-of select="concat(@id,'- ',property[@type = 'term']/@value,'; ')"/>
                </xsl:for-each>
              </td>
            </tr>
          </xsl:for-each>
        </table>
      </body>
    </html>
  </xsl:template>
</xsl:stylesheet>

输出:

<html>
   <body>
      <h2>My Selection</h2>
      <table border="1">
         <tr bgcolor="#9acd32">
            <th>Name</th>
            <th>GeneName</th>
            <th>AccessionNr</th>
            <th>ProteinName</th>
            <th>SubcellularLocation</th>
            <th>TissueSpecificity</th>
            <th>GOID</th>
            <th>GOName</th>
         </tr>
         <tr>
            <td>1A01_HUMAN</td>
            <td>HLA-A</td>
            <td>P30443</td>
            <td>HLA class I histocompatibility antigen, A-1 alpha chain</td>
            <td>Membrane</td>
            <td></td>
            <td>GO:0031901- C:early endosome membrane; GO:0012507- C:ER to Golgi transport vesicle membrane; GO:0000139- C:Golgi membrane; </td>
         </tr>
         <tr>
            <td>1A02_HUMAN</td>
            <td>HLA-A</td>
            <td>P01892</td>
            <td>HLA class I histocompatibility antigen, A-2 alpha chain</td>
            <td>Membrane</td>
            <td></td>
            <td>GO:0060333- P:interferon-gamma-mediated signaling pathway; </td>
         </tr>
         <tr>
            <td>1A03_HUMAN</td>
            <td>HLA-A</td>
            <td>P04439</td>
            <td>HLA class I histocompatibility antigen, A-3 alpha chain</td>
            <td>Membrane</td>
            <td></td>
            <td>GO:0005887- C:integral to plasma membrane; GO:0019048- P:virus-host interaction; </td>
         </tr>
      </table>
   </body>
</html>