我正在尝试从uniprot XML文件中选择一些数据,并且我能够获得我想要的大部分内容,但是我遇到了在同一节点中有更多条目的数据输出问题。并且最好将它们组合在一起。
XML代码:
<?xml version='1.0' encoding='UTF-8'?>
<?xml-stylesheet href="test_will7.xslt" type="text/xsl" ?>
<uniprot>
<entry dataset="Swiss-Prot" created="1993-04-01" modified="2012-11-28" version="118">
<accession>P30443</accession>
<accession>O77964</accession>
<name>1A01_HUMAN</name>
<protein>
<recommendedName>
<fullName>HLA class I histocompatibility antigen, A-1 alpha chain</fullName>
</recommendedName>
</protein>
<gene>
<name type="primary">HLA-A</name>
<name type="synonym">HLAA</name>
</gene>
</comment>
<comment type="subcellular location">
<subcellularLocation>
<location>Membrane</location>
<topology>Single-pass type I membrane protein</topology>
</subcellularLocation>
</comment>
<dbReference type="GO" id="GO:0031901">
<property type="term" value="C:early endosome membrane"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
<dbReference type="GO" id="GO:0012507">
<property type="term" value="C:ER to Golgi transport vesicle membrane"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
<dbReference type="GO" id="GO:0000139">
<property type="term" value="C:Golgi membrane"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
</entry>
<entry dataset="Swiss-Prot" created="1986-07-21" modified="2012-11-28" version="151">
<accession>P01892</accession>
<accession>O19619</accession>
<accession>P06338</accession>
<name>1A02_HUMAN</name>
<protein>
<recommendedName>
<fullName>HLA class I histocompatibility antigen, A-2 alpha chain</fullName>
</recommendedName>
</protein>
<gene>
<name type="primary">HLA-A</name>
<name type="synonym">HLAA</name>
</gene>
<comment type="subcellular location">
<subcellularLocation>
<location>Membrane</location>
<topology>Single-pass type I membrane protein</topology>
</subcellularLocation>
</comment>
<dbReference type="GO" id="GO:0060333">
<property type="term" value="P:interferon-gamma-mediated signaling pathway"/>
<property type="evidence" value="TAS:Reactome"/>
</dbReference>
</entry>
<entry dataset="Swiss-Prot" created="1987-08-13" modified="2012-11-28" version="124">
<accession>P04439</accession>
<name>1A03_HUMAN</name>
<protein>
<recommendedName>
<fullName>HLA class I histocompatibility antigen, A-3 alpha chain</fullName>
</recommendedName>
</protein>
<gene>
<name type="primary">HLA-A</name>
<name type="synonym">HLAA</name>
</gene>
<comment type="subcellular location">
<subcellularLocation>
<location>Membrane</location>
<topology>Single-pass type I membrane protein</topology>
</subcellularLocation>
</comment>
<dbReference type="GO" id="GO:0005887">
<property type="term" value="C:integral to plasma membrane"/>
<property type="evidence" value="NAS:UniProtKB"/>
</dbReference>
<dbReference type="GO" id="GO:0019048">
<property type="term" value="P:virus-host interaction"/>
<property type="evidence" value="IEA:UniProtKB-KW"/>
</dbReference>
</entry>
</uniprot>
我的xslt文件现在看起来像这样。但是,我仍然做错了,因为它不起作用。也许是因为不同的水平?
<?xml version="1.0" ?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="/">
<html>
<body>
<h2>My Selection</h2>
<table border="1">
<tr bgcolor="#9acd32">
<th>Name</th>
<th>GeneName</th>
<th>AccessionNr</th>
<th>ProteinName</th>
<th>SubcellularLocation</th>
<th>TissueSpecificity</th>
<th>GOID</th>
<th>GOName</th>
</tr>
<xsl:apply-templates/>
</table>
</body>
</html>
</xsl:template>
<xsl:template match="uniprot/entry">
<tr>
<xsl:apply-templates select="name|gene/name|accession|protein/recommendedName/fullName|comment[@type = 'subcellular location']/subcellularLocation/location|comment[@type = 'tissue specificty']/text|dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"/>
</tr>
</xsl:template>
<xsl:template match="name|gene/name|accession|protein/recommendedName/fullName|comment[@type = 'subcellular location']/subcellularLocation/location|comment[@type = 'tissue specificty']/text|dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value">
<xsl:choose>
<xsl:when test="name()='dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value' and not(preceding-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value)">
<td>
<xsl:value-of select="."/>
<xsl:if test="following-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value">
<xsl:text>;</xsl:text>
<xsl:for-each select="following-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value">
<xsl:value-of select="."/>
<xsl:if test="position()!=last()">
<xsl:text>;</xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:if>
</td>
</xsl:when>
<xsl:when test="name()='dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value' and preceding-sibling::dbReference[@type = 'GO']/@id|dbReference[@type = 'GO']/property[@type = 'term']/@value"/>
<xsl:otherwise>
<td>
<xsl:value-of select="."/>
</td>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
我想要的输出:
Name GeneName AccessionNr ProteinName SubcellularLocation GOID_ GOName
1A01_HUMAN HLA-A P30443 HLA class I histocompatibility antigen, A-1 alpha chain Membrane GO:0031901- C:early endosome membrane; GO:0012507- C:ER to Golgi transport vesicle membrane; GO:0000139- C:Golgi membrane
1A02_HUMAN HLA-A P01892 HLA class I histocompatibility antigen, A-2 alpha chain Membrane GO:0060333-P:interferon-gamma-mediated signaling pathway
1A03_HUMAN HLA-A P04439 HLA class I histocompatibility antigen, A-3 alpha chain Membrane GO:0005887- C:integral to plasma membrane; GO:0019048- P:virus-host interaction
如果这太难了,也可能是这样的:
Name GeneName AccessionNr ProteinName SubcellularLocation GOID GOName
1A01_HUMAN HLA-A P30443 HLA class I histocompatibility antigen, A-1 alpha chain Membrane GO:0031901; GO:0012507; GO:0000139 C:early endosome membrane; C:ER to Golgi transport vesicle membrane; C:Golgi membrane
1A02_HUMAN HLA-A P01892 HLA class I histocompatibility antigen, A-2 alpha chain Membrane GO:0060333 P:interferon-gamma-mediated signaling pathway
1A03_HUMAN HLA-A P04439 HLA class I histocompatibility antigen, A-3 alpha chain Membrane GO:0005887; GO:0019048 C:integral to plasma membrane; P:virus-host interaction
我知道这很多,很难区分一切。我可以阅读代码,但修复错误或编写新内容仍然非常困难! (我是XML的新手) 谢谢!
答案 0 :(得分:1)
我已从XML输入中删除了“有机体”并创建了新的xslt以获得所需的输出:
<强> XSLT:强>
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="uniprot">
<html>
<body>
<h2>My Selection</h2>
<table border="1">
<tr bgcolor="#9acd32">
<th>Name</th>
<th>GeneName</th>
<th>AccessionNr</th>
<th>ProteinName</th>
<th>SubcellularLocation</th>
<th>TissueSpecificity</th>
<th>GOID</th>
<th>GOName</th>
</tr>
<xsl:for-each select="entry">
<tr>
<td><xsl:value-of select="name"/></td>
<td><xsl:value-of select="gene/name"/></td>
<td><xsl:value-of select="accession"/></td>
<td><xsl:value-of select="protein/recommendedName/fullName"/></td>
<td><xsl:value-of select="comment[@type = 'subcellular location']/subcellularLocation/location"/></td>
<td><xsl:value-of select="comment[@type = 'tissue specificty']"/></td>
<td>
<xsl:for-each select="dbReference[@type = 'GO']">
<xsl:value-of select="concat(@id,'- ',property[@type = 'term']/@value,'; ')"/>
</xsl:for-each>
</td>
</tr>
</xsl:for-each>
</table>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
输出:
<html>
<body>
<h2>My Selection</h2>
<table border="1">
<tr bgcolor="#9acd32">
<th>Name</th>
<th>GeneName</th>
<th>AccessionNr</th>
<th>ProteinName</th>
<th>SubcellularLocation</th>
<th>TissueSpecificity</th>
<th>GOID</th>
<th>GOName</th>
</tr>
<tr>
<td>1A01_HUMAN</td>
<td>HLA-A</td>
<td>P30443</td>
<td>HLA class I histocompatibility antigen, A-1 alpha chain</td>
<td>Membrane</td>
<td></td>
<td>GO:0031901- C:early endosome membrane; GO:0012507- C:ER to Golgi transport vesicle membrane; GO:0000139- C:Golgi membrane; </td>
</tr>
<tr>
<td>1A02_HUMAN</td>
<td>HLA-A</td>
<td>P01892</td>
<td>HLA class I histocompatibility antigen, A-2 alpha chain</td>
<td>Membrane</td>
<td></td>
<td>GO:0060333- P:interferon-gamma-mediated signaling pathway; </td>
</tr>
<tr>
<td>1A03_HUMAN</td>
<td>HLA-A</td>
<td>P04439</td>
<td>HLA class I histocompatibility antigen, A-3 alpha chain</td>
<td>Membrane</td>
<td></td>
<td>GO:0005887- C:integral to plasma membrane; GO:0019048- P:virus-host interaction; </td>
</tr>
</table>
</body>
</html>