这是previous post的后续问题。我正在使用一组库目录记录(MARC XML格式),我想使用名为OpenRefine的工具清理和增强它。 OpenRefine与XML数据不能很好地协作,所以我需要将MARC XML转换为TSV。
我之前发帖的解决方案帮助我做到了。但是,一旦我从OpenRefine导出,我需要将数据往返到MARC XML。 OpenRefine的输出比典型的TSV更复杂:单个记录可以分布在多行中,因为某些字段是可重复的:
leader 001 005 007 008 020__$a 1001_$a 1001_$4 2451_$a 260__$b 260__$c 300__$a 520__$a 546__$a 650_0$a 653__$a 7001_$a 7001_$4 85640$u 85640$z
02179 am a 002893u 12789 20120521 cuuuu---auuuu 120521s|||| xx o 0 u ||| | 9789089640574 Rooij van ,Robert aut New Perspectives on Games and Interaction Amsterdam University Press 2008 1 electronic resource (330 p.) This volume is a ... Mathematics Economics Apt ,Krzysztof aut http://www.doabooks.org/doab?func=fulltext&rid=12789 Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial (CC by-nc)
Philosophy (General) Philosophy http://www.oapen.org/download?type=document&docid=340074
Economic theory. Demography Mathematics
Economie
Filosofie
Wiskunde
01914 am a 002413u 13087 20120521 cuuuu---auuuu 120521s|||| xx o 0 u ||| | 9783938616352 Roquette, Peter aut Helmut Hasse und Emmy Noether ; die Korrespondenz 1925 - 1935. Universitätsverlag Göttingen 2006 1 electronic resource ( p.) This book reproduces ... German Science (General) mathematics Lemmermeyer, Franz aut http://www.doabooks.org/doab?func=fulltext&rid=13087 Description of rights in Directory of Open Access Books (DOAB): Attribution No Derivatives (CC by-nd)
Mathematics correspondence http://www.oapen.org/download?type=document&docid=353934
02345 am a 002773u 13241 20120521 cuuuu---auuuu 120521s|||| xx o 0 u ||| | 9783940344502 Roquette, Peter aut Emil Artin und Helmut Hasse Universitätsverlag Göttingen 2008 1 electronic resource ( p.) This book contains ... German Mathematics history of mathematics Lemmermeyer, Franz aut http://www.doabooks.org/doab?func=fulltext&rid=13241 Description of rights in Directory of Open Access Books (DOAB): Attribution No Derivatives (CC by-nd)
Geschichte der Mathematik Frei, Günther aut http://www.oapen.org/download?type=document&docid=359593
OAPEN Noether, Emmy aut
Hasse, Helmut aut
我正在尝试修改从TSV转换为XML的XSLT 2.0样式表(基于提议的解决方案here):
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- File path parameter. -->
<xsl:param name="filePath">stack_test-tsv-2.tsv</xsl:param>
<!-- Main template that parses the TSV and creates structured XML. -->
<xsl:template match="dummy">
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<!-- Read in TSV file. -->
<xsl:variable name="text" select="unparsed-text($filePath,'UTF-8')"/>
<xsl:variable name="header">
<xsl:analyze-string select="$text" regex="(..*)">
<xsl:matching-substring>
<xsl:if test="position()=1">
<xsl:value-of select="replace(regex-group(1),'\t','|')"/>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:variable>
<xsl:variable name="headerTokens" select="tokenize($header,'\|')"/>
<xsl:variable name="recordBody">
<xsl:analyze-string select="$text" regex="(..*)">
<xsl:matching-substring>
<xsl:if test="not(position()=1)">
<!-- Begin creating the records.
Assign column headers to field elements as @name attributes. -->
<xsl:analyze-string select="." regex="([^\t][^\t]*)\t?|\t">
<xsl:matching-substring>
<xsl:variable name="pos" select="position()"/>
<xsl:variable name="headerToken" select="$headerTokens[$pos]"/>
<xsl:if test="regex-group(1)[position() = 1]">
<field name="{$headerToken}">
<xsl:value-of select="regex-group(1)"/>
</field>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:variable>
<!-- Split into record chunks. -->
<xsl:variable name="recompile">
<xsl:for-each select="$recordBody/field[@name='leader'][.!='']">
<xsl:variable name="ID" select="."/>
<record>
<xsl:sequence select="."/>
<xsl:for-each select="following-sibling::field[. != $ID]">
<xsl:if
test="preceding-sibling::field[@name='leader'][5][. != ''] = $ID
and not(self::field[@name='leader'])">
<xsl:sequence select="."/>
</xsl:if>
</xsl:for-each>
</record>
</xsl:for-each>
</xsl:variable>
<!-- Rebuild MARC record. -->
<xsl:for-each select="$recompile/record">
<marc:record>
<marc:leader>
<xsl:value-of select="child::node()[@name='leader']"/>
</marc:leader>
<xsl:if test="child::node()/@name='001'">
<marc:controlfield tag="001">
<xsl:value-of select="child::node()[@name='001']"/>
</marc:controlfield>
</xsl:if>
<xsl:if test="child::node()/@name='005'">
<marc:controlfield tag="005">
<xsl:value-of select="child::node()[@name='005']"/>
</marc:controlfield>
</xsl:if>
<xsl:if test="child::node()/@name='007'">
<marc:controlfield tag="007">
<xsl:value-of select="child::node()[@name='007']"/>
</marc:controlfield>
</xsl:if>
<xsl:if test="child::node()/@name='008'">
<marc:controlfield tag="008">
<xsl:value-of select="child::node()[@name='008']"/>
</marc:controlfield>
</xsl:if>
<xsl:for-each-group
select="child::node()[number(substring(@name,1, 3)) >= 020]"
group-adjacent="substring(@name, 1, 3)">
<xsl:sort select="current-grouping-key()"/>
<xsl:choose>
<xsl:when test="starts-with(current-grouping-key(),'6')">
<xsl:for-each select="current-group()">
<marc:datafield tag="{current-grouping-key()}"
ind1="
{if (substring(@name,4,1) = '_')
then ' '
else substring(@name, 4, 1)}"
ind2="{
if (substring(@name, 5, 1) = '_')
then ' '
else substring(@name, 5, 1)}">
<marc:subfield code="{substring(@name, 7, 1)}">
<xsl:value-of select="."/>
</marc:subfield>
</marc:datafield>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<marc:datafield tag="{current-grouping-key()}"
ind1="{if (substring(@name,4,1) = '_')
then ' '
else substring(@name, 4, 1)}"
ind2="{
if (substring(@name, 5, 1) = '_')
then ' '
else substring(@name, 5, 1)}">
<xsl:for-each select="current-group()">
<marc:subfield code="{substring(@name, 7, 1)}">
<xsl:value-of select="."/>
</marc:subfield>
</xsl:for-each>
</marc:datafield>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</marc:record>
</xsl:for-each>
</marc:collection>
</xsl:template>
</xsl:stylesheet>
这基本上给了我所需要的东西,但它并不灵活,使用这种方法考虑源数据中所有可能的变化将是一项挑战。
相反,我想修改xsl:analyze-string
正则表达式,以便预先处理这个更复杂的制表符分隔结构。基本上,每次有&#34;领导者的价值,&#34;应该有一个新的记录。后续行中出现的各个值应解析为单独的XML元素,如下所示:
<?xml version="1.0" encoding="UTF-8" ?>
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<marc:record>
<marc:leader>02179 am a 002893u </marc:leader>
<marc:controlfield tag="001">12789</marc:controlfield>
<marc:controlfield tag="005">20120521</marc:controlfield>
<marc:controlfield tag="007">cuuuu---auuuu</marc:controlfield>
<marc:controlfield tag="008">120521s|||| xx o 0 u ||| |</marc:controlfield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9789089640574</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">Rooij van ,Robert</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="1" ind2=" ">
<marc:subfield code="a">New Perspectives on Games and Interaction</marc:subfield>
</marc:datafield>
<marc:datafield tag="260" ind1=" " ind2=" ">
<marc:subfield code="b">Amsterdam University Press</marc:subfield>
<marc:subfield code="c">2008</marc:subfield>
</marc:datafield>
<marc:datafield tag="300" ind1=" " ind2=" ">
<marc:subfield code="a">1 electronic resource (330 p.)</marc:subfield>
</marc:datafield>
<marc:datafield tag="520" ind1=" " ind2=" ">
<marc:subfield code="a">This volume is a collection of papers ...</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Philosophy (General)</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Economic theory. Demography</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Economics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Philosophy</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Economie</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Filosofie</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Wiskunde</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Apt ,Krzysztof</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.doabooks.org/doab?func=fulltext&rid=12789</marc:subfield>
<marc:subfield code="z">Description of rights in Directory of Open Access Books (DOAB): Attribution Non-commercial (CC by-nc)</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.oapen.org/download?type=document&docid=340074</marc:subfield>
</marc:datafield>
</marc:record>
<marc:record>
<marc:leader>01914 am a 002413u </marc:leader>
<marc:controlfield tag="001">13087</marc:controlfield>
<marc:controlfield tag="005">20120521</marc:controlfield>
<marc:controlfield tag="007">cuuuu---auuuu</marc:controlfield>
<marc:controlfield tag="008">120521s|||| xx o 0 u ||| |</marc:controlfield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9783938616352</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">Roquette, Peter</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="1" ind2=" ">
<marc:subfield code="a">Helmut Hasse und Emmy Noether ; die Korrespondenz 1925 - 1935.</marc:subfield>
</marc:datafield>
<marc:datafield tag="260" ind1=" " ind2=" ">
<marc:subfield code="b">Universitätsverlag Göttingen</marc:subfield>
<marc:subfield code="c">2006</marc:subfield>
</marc:datafield>
<marc:datafield tag="300" ind1=" " ind2=" ">
<marc:subfield code="a">1 electronic resource ( p.)</marc:subfield>
</marc:datafield>
<marc:datafield tag="520" ind1=" " ind2=" ">
<marc:subfield code="a">This book reproduces...</marc:subfield>
</marc:datafield>
<marc:datafield tag="546" ind1=" " ind2=" ">
<marc:subfield code="a">German</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Science (General)</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">correspondence</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Lemmermeyer, Franz</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.doabooks.org/doab?func=fulltext&rid=13087</marc:subfield>
<marc:subfield code="z">Description of rights in Directory of Open Access Books (DOAB): Attribution No Derivatives (CC by-nd)</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.oapen.org/download?type=document&docid=353934</marc:subfield>
</marc:datafield>
</marc:record>
<marc:record>
<marc:leader>02345 am a 002773u </marc:leader>
<marc:controlfield tag="001">13241</marc:controlfield>
<marc:controlfield tag="005">20120521</marc:controlfield>
<marc:controlfield tag="007">cuuuu---auuuu</marc:controlfield>
<marc:controlfield tag="008">120521s|||| xx o 0 u ||| |</marc:controlfield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9783940344502</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">Roquette, Peter</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="1" ind2=" ">
<marc:subfield code="a">Emil Artin und Helmut Hasse</marc:subfield>
</marc:datafield>
<marc:datafield tag="260" ind1=" " ind2=" ">
<marc:subfield code="b">Universitätsverlag Göttingen</marc:subfield>
<marc:subfield code="c">2008</marc:subfield>
</marc:datafield>
<marc:datafield tag="300" ind1=" " ind2=" ">
<marc:subfield code="a">1 electronic resource ( p.)</marc:subfield>
</marc:datafield>
<marc:datafield tag="520" ind1=" " ind2=" ">
<marc:subfield code="a">This book contains ...</marc:subfield>
</marc:datafield>
<marc:datafield tag="546" ind1=" " ind2=" ">
<marc:subfield code="a">German</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">history of mathematics</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">Geschichte der Mathematik</marc:subfield>
</marc:datafield>
<marc:datafield tag="653" ind1=" " ind2=" ">
<marc:subfield code="a">OAPEN</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Lemmermeyer, Franz</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Frei, Günther</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Noether, Emmy</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Hasse, Helmut</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.doabooks.org/doab?func=fulltext&rid=13241</marc:subfield>
<marc:subfield code="z">Description of rights in Directory of Open Access Books (DOAB): Attribution No Derivatives (CC by-nd)</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="u">http://www.oapen.org/download?type=document&docid=359593</marc:subfield>
</marc:datafield>
</marc:record>
</marc:collection>
答案 0 :(得分:1)
我对你的XSLT进行了一些小的调整,输出非常接近你发布的内容。有两点不同。
文字&#34; 本卷是一系列论文...... &#34;是&#34; 这个卷是...... &#34;因为&#34; 论文集&#34;不在输入
第3个到最后marc:datafield
个子字段组合在一起。这是因为@name
是相同的,而分组键()并不以6开头。不确定这是否正确。
可能是糟糕的数据字段:
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Noether, Emmy</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
<marc:subfield code="a">Hasse, Helmut</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
XSLT 2.0
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:marc="http://www.loc.gov/MARC21/slim">
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- File path parameter. -->
<xsl:param name="filePath">stack_test-tsv-2.tsv</xsl:param>
<!-- Main template that parses the TSV and creates structured XML. -->
<xsl:template match="/*">
<marc:collection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<!-- Read in TSV file. -->
<xsl:variable name="text" select="unparsed-text($filePath,'UTF-8')"/>
<xsl:variable name="header">
<xsl:analyze-string select="$text" regex="(..*)">
<xsl:matching-substring>
<xsl:if test="position()=1">
<xsl:value-of select="replace(regex-group(1),'\t','|')"/>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:variable>
<xsl:variable name="headerTokens" select="tokenize($header,'\|')"/>
<xsl:variable name="recordBody">
<xsl:analyze-string select="$text" regex="(..*)">
<xsl:matching-substring>
<xsl:if test="not(position()=1)">
<!-- Begin creating the records.
Assign column headers to field elements as @name attributes. -->
<xsl:analyze-string select="." regex="([^\t][^\t]*)\t?|\t">
<xsl:matching-substring>
<xsl:variable name="pos" select="position()"/>
<xsl:variable name="headerToken" select="$headerTokens[$pos]"/>
<xsl:if test="regex-group(1)[position() = 1]">
<field name="{normalize-space($headerToken)}">
<xsl:value-of select="regex-group(1)"/>
</field>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:variable>
<!-- Split into record chunks. -->
<xsl:variable name="recompile">
<xsl:for-each-group select="$recordBody/field" group-starting-with="field[@name='leader'][string()]">
<record>
<xsl:copy-of select="current-group()"/>
</record>
</xsl:for-each-group>
</xsl:variable>
<!-- Rebuild MARC record. -->
<xsl:for-each select="$recompile/record">
<marc:record>
<marc:leader>
<xsl:apply-templates select="field[@name='leader']"/>
</marc:leader>
<xsl:apply-templates select="*[@name=('001','005','007','008')]" mode="controlfield"/>
<xsl:for-each-group
select="field[number(substring(@name,1, 3)) >= 20]"
group-adjacent="substring(@name, 1, 3)">
<xsl:sort select="current-grouping-key()"/>
<xsl:choose>
<xsl:when test="starts-with(current-grouping-key(),'6')">
<xsl:for-each select="current-group()">
<marc:datafield tag="{current-grouping-key()}"
ind1="{if (substring(@name,4,1) = '_')
then ' '
else substring(@name, 4, 1)}"
ind2="{
if (substring(@name, 5, 1) = '_')
then ' '
else substring(@name, 5, 1)}">
<marc:subfield code="{substring(@name, 7, 1)}">
<xsl:value-of select="."/>
</marc:subfield>
</marc:datafield>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<marc:datafield tag="{current-grouping-key()}"
ind1="{if (substring(@name,4,1) = '_')
then ' '
else substring(@name, 4, 1)}"
ind2="{
if (substring(@name, 5, 1) = '_')
then ' '
else substring(@name, 5, 1)}">
<xsl:for-each select="current-group()">
<marc:subfield code="{substring(@name, 7, 1)}">
<xsl:value-of select="."/>
</marc:subfield>
</xsl:for-each>
</marc:datafield>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</marc:record>
</xsl:for-each>
</marc:collection>
</xsl:template>
<xsl:template match="field" mode="controlfield">
<marc:controlfield tag="{@name}">
<xsl:value-of select="."/>
</marc:controlfield>
</xsl:template>
<xsl:template match="field">
<xsl:value-of select="normalize-space()"/>
</xsl:template>
</xsl:stylesheet>
修改强>
这是生成所需输出的XSLT的更新版本。我的观点是,在转换field
XML时,应该处理数据域的任何变化。我认为对XML的调整比尝试修改正则表达式更容易。
XSLT 2.0
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:marc="http://www.loc.gov/MARC21/slim">
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- File path parameter. -->
<xsl:param name="filePath">stack_test-tsv-2.tsv</xsl:param>
<!-- Main template that parses the TSV and creates structured XML. -->
<xsl:template match="/*">
<marc:collection
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<!-- Read in TSV file. -->
<xsl:variable name="text" select="unparsed-text($filePath,'UTF-8')"/>
<xsl:variable name="header">
<xsl:analyze-string select="$text" regex="(..*)">
<xsl:matching-substring>
<xsl:if test="position()=1">
<xsl:value-of select="replace(regex-group(1),'\t','|')"/>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:variable>
<xsl:variable name="headerTokens" select="tokenize($header,'\|')"/>
<xsl:variable name="recordBody">
<xsl:analyze-string select="$text" regex="(..*)">
<xsl:matching-substring>
<xsl:if test="not(position()=1)">
<!-- Begin creating the records.
Assign column headers to field elements as @name attributes. -->
<xsl:analyze-string select="." regex="([^\t][^\t]*)\t?|\t">
<xsl:matching-substring>
<xsl:variable name="pos" select="position()"/>
<xsl:variable name="headerToken" select="$headerTokens[$pos]"/>
<xsl:if test="regex-group(1)[position() = 1]">
<field name="{normalize-space($headerToken)}" nbr="{substring(tokenize(normalize-space($headerToken),'[^0-9]')[1],1,3)}">
<xsl:value-of select="regex-group(1)"/>
</field>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:if>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:variable>
<!-- Split into record chunks. -->
<xsl:variable name="recompile">
<xsl:for-each-group select="$recordBody/field" group-starting-with="field[@name='leader'][string()]">
<record>
<xsl:copy-of select="current-group()"/>
</record>
</xsl:for-each-group>
</xsl:variable>
<!-- Rebuild MARC record. -->
<xsl:for-each select="$recompile/record">
<marc:record>
<marc:leader>
<xsl:apply-templates select="field[@name='leader']"/>
</marc:leader>
<xsl:apply-templates select="*[@name=('001','005','007','008')]" mode="controlfield"/>
<xsl:variable name="datafields">
<xsl:for-each-group
select="field[number(@nbr) >= 20]"
group-adjacent="@nbr">
<xsl:variable name="firstName" select="current-group()[1]/@name"/>
<xsl:for-each-group select="current-group()" group-starting-with="*[@name=$firstName]">
<marc:datafield tag="{@nbr}"
ind1="{if (substring(@name,4,1) = '_')
then ' '
else substring(@name, 4, 1)}"
ind2="{
if (substring(@name, 5, 1) = '_')
then ' '
else substring(@name, 5, 1)}">
<xsl:for-each select="current-group()">
<marc:subfield code="{substring(@name, 7, 1)}">
<xsl:apply-templates select="."/>
</marc:subfield>
</xsl:for-each>
</marc:datafield>
</xsl:for-each-group>
</xsl:for-each-group>
</xsl:variable>
<xsl:perform-sort select="$datafields/*">
<xsl:sort select="@tag"></xsl:sort>
</xsl:perform-sort>
</marc:record>
</xsl:for-each>
</marc:collection>
</xsl:template>
<xsl:template match="field" mode="controlfield">
<marc:controlfield tag="{@name}">
<xsl:value-of select="."/>
</marc:controlfield>
</xsl:template>
<xsl:template match="field">
<xsl:value-of select="normalize-space()"/>
</xsl:template>
</xsl:stylesheet>