我找到了几个解决我问题部分的问题(请参阅here和here,但我无法集成它们。我有一组我想要的XML记录转换为制表符分隔格式。但是,并非所有XML记录都包含所有字段,有些包含字段的多个实例。
两个示例XML记录:
<?xml version="1.0" encoding="UTF-8" ?>
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<marc:record>
<marc:leader>02179 am a 002893u </marc:leader>
<marc:controlfield tag="001">12789</marc:controlfield>
<marc:controlfield tag="005">20120521</marc:controlfield>
<marc:controlfield tag="007">cuuuu---auuuu</marc:controlfield>
<marc:controlfield tag="008">120521s|||| xx o 0 u ||| |</marc:controlfield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9789089640574</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">Rooij van ,Robert</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="1" ind2=" ">
<marc:subfield code="a">New Perspectives on Games and Interaction</marc:subfield>
</marc:datafield>
<marc:datafield tag="260" ind1=" " ind2=" ">
<marc:subfield code="b">Amsterdam University Press</marc:subfield>
<marc:subfield code="c">2008</marc:subfield>
</marc:datafield>
<marc:datafield tag="300" ind1=" " ind2=" ">
<marc:subfield code="a">1 electronic resource (330 p.)</marc:subfield>
</marc:datafield>
<marc:datafield tag="520" ind1=" " ind2=" ">
<marc:subfield code="a">This volume is a collection of papers ...</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Philosophy (General)</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Economic theory. Demography</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Economics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Philosophy</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Economie</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Filosofie</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Wiskunde</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Apt ,Krzysztof</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.doabooks.org/doab?func=fulltext&rid=12789</marc:subfield>
<marc:subfield code="z">Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial (CC by-nc)</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.oapen.org/download?type=document&docid=340074</marc:subfield>
</marc:datafield>
</marc:record>
<marc:record>
<marc:leader>01452 am a 001933u </marc:leader>
<marc:controlfield tag="001">15497</marc:controlfield>
<marc:controlfield tag="005">20140217</marc:controlfield>
<marc:controlfield tag="007">cuuuu---auuuu</marc:controlfield>
<marc:controlfield tag="008">140217s|||| xx o 0 u ||| |</marc:controlfield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9788867050673</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">Emanuele Haus</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="1" ind2=" ">
<marc:subfield code="a">Dynamics of an elastic satellite with internal friction.</marc:subfield>
</marc:datafield>
<marc:datafield tag="260" ind1=" " ind2=" ">
<marc:subfield code="b">Ledizioni - LediPublishing</marc:subfield>
<marc:subfield code="c">2013</marc:subfield>
</marc:datafield>
<marc:datafield tag="300" ind1=" " ind2=" ">
<marc:subfield code="a">1 electronic resource ( p.)</marc:subfield>
</marc:datafield>
<marc:datafield tag="520" ind1=" " ind2=" ">
<marc:subfield code="a">n this thesis, we study the dynamics...</marc:subfield>
</marc:datafield>
<marc:datafield tag="546" ind1=" " ind2=" ">
<marc:subfield code="a">english</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.doabooks.org/doab?func=fulltext&rid=15497</marc:subfield>
<marc:subfield code="z">Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial Share Alike (CC by-nc-sa)</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.ledizioni.it/stag/wp-content/uploads/2014/02/tesi_haus.pdf</marc:subfield>
</marc:datafield>
</marc:record>
</marc:collection>
我一直试图从这个previous answer调整XSLT,到目前为止运气不大:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xpath-default-namespace="http://www.loc.gov/MARC21/slim">
<xsl:output method="text"/>
<xsl:variable name="delimiter" select="'	'"/>
<xsl:strip-space elements="*"/>
<xsl:output method="text"/>
<xsl:key name="field"
match="/collection/record/datafield/subfield"
use="concat(../@tag,@code)"/>
<!-- variable containing the first occurrence of each field -->
<xsl:variable name="allFields"
select="/collection/record/datafield/subfield
[generate-id()
=generate-id(key('field',
concat(../@tag,@code))[1])]" />
<xsl:template match="/">
<xsl:for-each select="$allFields">
<xsl:sort select="substring(concat(../@tag,@code),1,3)"
data-type="number"/>
<xsl:value-of select="concat(../@tag,@code)" />
<xsl:if test="position() < last()">
<xsl:value-of select="$delimiter" />
</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
<xsl:apply-templates select="*/*" />
</xsl:template>
<xsl:template match="*">
<xsl:variable name="this" select="." />
<xsl:for-each select="$allFields">
<xsl:sort
select="substring(concat(../@tag,@code),1,3)"
data-type="number"/>
<xsl:value-of
select="$this/*[@code = current()/@code]" />
<xsl:if test="position() < last()">
<xsl:value-of select="$delimiter" />
</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>
在我想要实现的输出中,标题将包含leader
后跟唯一值@tag
(与子域的subfield/@code
连接),按升序排序按tag
订购:
leader 001 005 007 008 020a 100a 1004 245a 260b 260c 300a 520a 546a 650a 653a 700a 7004 856u 856z
如果一个记录有一个field/subfield
组合的多个值,我想将它们连在一起,例如:
653a
Economics|Philosophy|Mathematics
但是,如果记录缺少特定字段,我只想输出制表符,以保持一切对齐。
完整样本TSV输出:
leader 001 005 007 008 020a 100a 1004 245a 260b 260c 300a 520a 546a 650a 653a 700a 7004 856u 856z
02179 am a 002893u 12789 20120521 cuuuu---auuuu 120521s|||| xx o 0 u ||| | 9789089640574 Rooij van ,Robert aut New Perspectives on Games and Interaction Amsterdam University Press 2008 1 electronic resource (330 p.) This volume is a collection of papers Mathematics|Philosophy (General)|Economic theory. Demography Economics|Philosophy|Mathematics|Economie|Filosofie|Wiskunde Apt ,Krzysztof< aut http://www.doabooks.org/doab?func=fulltext&rid=12789|http://www.oapen.org/download?type=document&docid=340074 Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial (CC by-nc)
01452 am a 001933u 15497 20140217 cuuuu---auuuu 140217s|||| xx o 0 u ||| | 9788867050673 Emanuele Haus aut Dynamics of an elastic satellite with internal friction. Ledizioni - LediPublishing 2013 1 electronic resource ( p.) In this thesis, we study the dynamics of an elastic body english Mathematics http://www.doabooks.org/doab?func=fulltext&rid=15497|http://www.ledizioni.it/stag/wp-content/uploads/2014/02/tesi_haus.pdf Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial Share Alike (CC by-nc-sa)
答案 0 :(得分:2)
你说&#34;如果某个记录缺少特定字段&#34; - 从这里我推断你必须有一个你想要导出的字段列表。 (所有的MARC?理论上每个理论都可以从000到999?只有你可以说,你还没有说过。)如果你没有要导出的字段列表,那么你的问题陈述是自相矛盾的,你需要更好地理解这个问题。
我们假设您要导出变量$ fields中列出的字段。
<xsl:variable name="fields" as="xs:string*"
select="tokenize('001 005 007 008 020
100 245 260 260 300
520 546 650 653 700
856', '\s+')"/>
您当前的问题是您的输出正由输入中的字段整形,许多XSLT程序员称之为“推送”。样式表。您希望输出由$ fields中的字段列表而不是输入进行整形 - 您希望这些XSLT程序员称之为&#39; pull&#39;样式表。当我们为非XML系统(如电子表格)准备数据时,拉式样式表很常见,这些系统对结构的变化不是很好;他们在程序性程序员中也很常见,他们不知道其他任何思考问题的方法。这些都导致一些XSLT程序员在拉式样式表中略微低估了一下,但是如果你已经正确地描述了你的问题,那么拉式样式表就是你需要的。
从目前为止所说的,您应该能够看到问题是/的模板是通过使用<xsl:apply-templates select="*/*" />
处理输入来构造输出。如果输入没有546个字段,则没有机会在没有大量不必要的努力的情况下插入选项卡。
您希望使用遍历$字段中的字段编号的构造替换迭代在子项上的当前apply-templates
,并且每个字段编号都会发出选项卡和任何其他适当的信息,其他适当的信息取决于输入中是否存在具有该数字的字段。在XSLT 3.0中,您可以将模板应用于一系列值,因此您可以编写<xsl:apply-templates select="$fields"/>
,但在2.0中,这不是一个选项。 2.0中提供的选项包括:
将$ fields表示为不是字符串序列,而是表示元素序列;调用<xsl:apply-templates select="$fields"/>
迭代所需的字段编号。您需要记住从输入文档传入一个节点(根是一个不错的选择),因此您可以从模板中返回字段编号。
使用$ fields作为参数调用命名模板;在命名模板中,从列表中选取第一个字段编号,处理它,然后使用列表的其余部分递归调用相同的命名模板。如果没有第一个字段编号,则字段编号序列为空,您已完成。
编写一个递归函数,其工作方式与刚才描述的命名模板相同。
编写一个处理一个MARC记录的一个字段编号的函数,并从XPath for
表达式中调用它:
<xsl:template match="marc:record">
...
<xsl:sequence select="for $fn in $fields
return my:one-field-one-record($fn, .)
"/>
...
</xsl:template>
答案 1 :(得分:2)
我建议你这样试试:
XSLT 2.0
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:marc="http://www.loc.gov/MARC21/slim"
exclude-result-prefixes="marc">
<xsl:output method="text" encoding="UTF-8"/>
<xsl:variable name="fields">
<xsl:for-each-group select="/marc:collection/marc:record/marc:datafield" group-by="@tag">
<xsl:sort select="@tag"/>
<xsl:for-each select="marc:subfield">
<xsl:sort/>
<field tag="{current-grouping-key()}" code="{@code}">a</field>
</xsl:for-each>
</xsl:for-each-group>
</xsl:variable>
<xsl:template match="/">
<!-- header -->
<xsl:for-each select="$fields/field">
<xsl:value-of select="@tag"/>
<xsl:value-of select="@code"/>
<xsl:if test="position()!=last()">
<xsl:text>	</xsl:text>
</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
<!-- data -->
<xsl:for-each select="marc:collection/marc:record">
<xsl:variable name="current-record" select="." />
<xsl:for-each select="$fields/field">
<xsl:value-of select="$current-record/marc:datafield[@tag=current()/@tag]/marc:subfield[@code=current()/@code]" separator="|"/>
<xsl:if test="position()!=last()">
<xsl:text>	</xsl:text>
</xsl:if>
</xsl:for-each>
<xsl:if test="position()!=last()">
<xsl:text> </xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
结果,应用于您的示例输入时:
020a 100a 1004 245a 260c 260b 300a 520a 546a 650a 653a 700a 7004 856z 856u
9789089640574 Rooij van ,Robert aut New Perspectives on Games and Interaction 2008 Amsterdam University Press 1 electronic resource (330 p.) This volume is a collection of papers ... Mathematics|Philosophy (General)|Economic theory. Demography Economics|Philosophy|Mathematics|Economie|Filosofie|Wiskunde Apt ,Krzysztof aut Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial (CC by-nc) http://www.doabooks.org/doab?func=fulltext&rid=12789|http://www.oapen.org/download?type=document&docid=340074
9788867050673 Emanuele Haus aut Dynamics of an elastic satellite with internal friction. 2013 Ledizioni - LediPublishing 1 electronic resource ( p.) n this thesis, we study the dynamics... english Mathematics Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial Share Alike (CC by-nc-sa) http://www.doabooks.org/doab?func=fulltext&rid=15497|http://www.ledizioni.it/stag/wp-content/uploads/2014/02/tesi_haus.pdf
注意:我无法弄清楚输入或输出中“领导者”的角色。
答案 2 :(得分:2)
这也适用于XSLT 1.0。
以下解决方案围绕文档范围的唯一标记列表构建,并为每条记录迭代该列表。实际上,即使记录中不存在特定标记,这也允许输出分隔符。
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:marc="http://www.loc.gov/MARC21/slim"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>
<xsl:output method="text" encoding="Windows-1252" />
<xsl:param name="hDelim" select="'	'" /><!-- vertical delimiter -->
<xsl:param name="vDelim" select="'
'" /><!-- horizontal delimiter -->
<xsl:param name="sDelim" select="'|'" /><!-- subfield delimiter -->
<!-- group tags by @tag + @code -->
<xsl:key name="kAllTags" match="marc:controlfield | marc:subfield" use="
concat(@tag, ../@tag, @code)
" />
<!-- group tags by record ID + @tag + @code -->
<xsl:key name="kRecordTags" match="marc:controlfield | marc:subfield" use="
concat(generate-id(ancestor::marc:record), ':', @tag|../@tag, @code)
" />
<!-- build a list of unique tags to iterate over -->
<xsl:variable name="uniqueTags" select="
(//marc:controlfield | //marc:subfield)[
generate-id() = generate-id(key('kAllTags', concat(@tag | ../@tag, @code)))
]
" />
<xsl:template match="marc:collection">
<!-- write header line -->
<xsl:text>leader</xsl:text>
<xsl:value-of select="$hDelim" />
<xsl:apply-templates select="$uniqueTags" mode="head">
<xsl:sort select="concat(@tag|../@tag, @code)" />
</xsl:apply-templates>
<xsl:value-of select="$vDelim" />
<!-- write individual records -->
<xsl:apply-templates select="marc:record" />
</xsl:template>
<xsl:template match="marc:record">
<xsl:variable name="recordId" select="generate-id()" />
<xsl:value-of select="marc:leader" />
<xsl:value-of select="$hDelim" />
<!-- for each unique tag, find the fields that have that tag on this record -->
<xsl:for-each select="$uniqueTags">
<xsl:variable name="tagKey" select="concat($recordId, ':', @tag|../@tag, @code)" />
<xsl:apply-templates select="key('kRecordTags', $tagKey)" mode="data" />
<xsl:if test="position() != last()"><xsl:value-of select="$hDelim" /></xsl:if>
</xsl:for-each>
<xsl:if test="position() != last()"><xsl:value-of select="$vDelim" /></xsl:if>
</xsl:template>
<xsl:template match="marc:controlfield | marc:subfield" mode="head">
<xsl:value-of select="concat(@tag|../@tag, @code)" />
<xsl:if test="position() != last()"><xsl:value-of select="$hDelim" /></xsl:if>
</xsl:template>
<xsl:template match="marc:controlfield | marc:subfield" mode="data">
<xsl:value-of select="normalize-space()" />
<xsl:if test="position() != last()"><xsl:value-of select="$sDelim" /></xsl:if>
</xsl:template>
</xsl:stylesheet>
此模板使用您的输入数据生成:
leader 001 005 007 008 020a 1004 100a 245a 260b 260c 300a 520a 546a 650a 653a 7004 700a 856u 856z 02179 am a 002893u 12789 20120521 cuuuu---auuuu 120521s|||| xx o 0 u ||| | 9789089640574 Rooij van ,Robert aut New Perspectives on Games and Interaction Amsterdam University Press 2008 1 electronic resource (330 p.) This volume is a collection of papers ... Mathematics|Philosophy (General)|Economic theory. Demography Economics|Philosophy|Mathematics|Economie|Filosofie|Wiskunde Apt ,Krzysztof aut http://www.doabooks.org/doab?func=fulltext&rid=12789|http://www.oapen.org/download?type=document&docid=340074 Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial (CC by-nc) 01452 am a 001933u 15497 20140217 cuuuu---auuuu 140217s|||| xx o 0 u ||| | 9788867050673 Emanuele Haus aut Dynamics of an elastic satellite with internal friction. Ledizioni - LediPublishing 2013 1 electronic resource ( p.) n this thesis, we study the dynamics... Mathematics http://www.doabooks.org/doab?func=fulltext&rid=15497|http://www.ledizioni.it/stag/wp-content/uploads/2014/02/tesi_haus.pdf Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial Share Alike (CC by-nc-sa) english