我希望使用XSLT 2.0转换获取.ods
文件/content.xml
中的单元格中的值。无论空白单元格如何,我都希望获得列D
的值。
可以是任何要迭代的列。
对于所有其他列也是如此,我想将它们放入XML输出中的不同节点。
例如,这个content.xml
文件:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<office:document-content
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:msoxl="http://schemas.microsoft.com/office/excel/formula">
<office:font-face-decls>
<style:font-face style:name="Calibri" svg:font-family="Calibri"/>
</office:font-face-decls>
<office:automatic-styles>
<style:style style:name="ce1" style:family="table-cell" style:parent-style-name="Default" style:data-style-name="N0"/>
<style:style style:name="co1" style:family="table-column">
<style:table-column-properties fo:break-before="auto" style:column-width="1.69333333333333cm"/>
</style:style>
<style:style style:name="ro1" style:family="table-row">
<style:table-row-properties style:row-height="15pt" style:use-optimal-row-height="true" fo:break-before="auto"/>
</style:style>
<style:style style:name="ta1" style:family="table" style:master-page-name="mp1">
<style:table-properties table:display="true" style:writing-mode="lr-tb"/>
</style:style>
</office:automatic-styles>
<office:body>
<office:spreadsheet>
<table:calculation-settings table:case-sensitive="false" table:search-criteria-must-apply-to-whole-cell="false"/>
<table:table table:name="Folha1" table:style-name="ta1">
<table:table-column table:style-name="co1" table:number-columns-repeated="16384" table:default-cell-style-name="ce1"/>
<table:table-row table:style-name="ro1">
<table:table-cell office:value-type="string" table:style-name="ce1">
<text:p>item1</text:p>
</table:table-cell>
<table:table-cell table:number-columns-repeated="2" table:style-name="ce1"/>
<table:table-cell office:value-type="string" table:style-name="ce1">
<text:p>item3</text:p>
</table:table-cell>
<table:table-cell table:number-columns-repeated="16380" table:style-name="ce1"/>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:number-columns-repeated="16384"/>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell office:value-type="string" table:style-name="ce1">
<text:p>item2</text:p>
</table:table-cell>
<table:table-cell table:number-columns-repeated="16383" table:style-name="ce1"/>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:number-columns-repeated="16384"/>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:number-columns-repeated="3" table:style-name="ce1"/>
<table:table-cell office:value-type="string" table:style-name="ce1">
<text:p>item4</text:p>
</table:table-cell>
<table:table-cell table:number-columns-repeated="3" table:style-name="ce1"/>
<table:table-cell office:value-type="string" table:style-name="ce1">
<text:p>item5</text:p>
</table:table-cell>
<table:table-cell table:number-columns-repeated="16376"/>
</table:table-row>
<table:table-row table:number-rows-repeated="5" table:style-name="ro1">
<table:table-cell table:number-columns-repeated="16384"/>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:number-columns-repeated="7" table:style-name="ce1"/>
<table:table-cell office:value-type="string" table:style-name="ce1">
<text:p>item5</text:p>
</table:table-cell>
<table:table-cell table:number-columns-repeated="16376"/>
</table:table-row>
<table:table-row table:number-rows-repeated="3" table:style-name="ro1">
<table:table-cell table:number-columns-repeated="16384"/>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:number-columns-repeated="7" table:style-name="ce1"/>
<table:table-cell office:value-type="string" table:style-name="ce1">
<text:p>item6</text:p>
</table:table-cell>
<table:table-cell table:number-columns-repeated="16376"/>
</table:table-row>
<table:table-row table:number-rows-repeated="1048561" table:style-name="ro1">
<table:table-cell table:number-columns-repeated="16384"/>
</table:table-row>
</table:table>
<table:table table:name="Folha2" table:style-name="ta1">
<table:table-column table:style-name="co1" table:number-columns-repeated="16384" table:default-cell-style-name="ce1"/>
<table:table-row table:number-rows-repeated="1048576" table:style-name="ro1">
<table:table-cell table:number-columns-repeated="16384"/>
</table:table-row>
</table:table>
<table:table table:name="Folha3" table:style-name="ta1">
<table:table-column table:style-name="co1" table:number-columns-repeated="16384" table:default-cell-style-name="ce1"/>
<table:table-row table:number-rows-repeated="1048576" table:style-name="ro1">
<table:table-cell table:number-columns-repeated="16384"/>
</table:table-row>
</table:table>
</office:spreadsheet>
</office:body>
</office:document-content>
<?xml version="1.0" encoding="UTF-8"?>
基本XSLT示例:
<xsl:template match="office:document-content/office:body/office:spreadsheet//table:table[@table:name='Folha1']">
<xsl:variable name="description" select="table:table-row/table:table-cell[4]/text:p"/>
<!--xsl:if test="$description != '.'"-->
<Description>
<xsl:value-of select="$description"/>
</Description>
</xsl:template>
这个例子的输出给了我&#34; item5&#34;,好吧,但我真的不知道这背后的逻辑 - 它是如何计算的等等。
据我所知,在XSLT中,空单元格并未真正计算在内!
我是对的吗?
但不知怎的,.ods
文件确实保存了这些空单元格和文档结构,以便下次打开文件时可以检索它。
但是如何?
所以我想知道的是:
我们如何真正选择计算这些项目&#34;空&#34;也可以在单元格中找到每个项目的特定位置。
所需的输出如下:
<group1>
<Description>item</Description>
<Description>item2</Description>
<group4>
<Description>item3</Description>
<Description>item4</Description>
<group8>
<Description>item5</Description>
<Description>item6</Description>
<Description>item7</Description>
答案 0 :(得分:1)
我扩展了我的代码以解决此问题。
在this SO answer中,我解释了.ods
文件结构的一小部分以及如何查找特定单元格。新代码将其扩展为常见电子表格符号中的单元格块,如
A1:A5
B12:C14,E5:E7
并利用XSLT-2.0功能。我并不认为这是最优雅的解决方案,但迄今为止的测试看起来很有希望。
以下是检索单元格块内容的代码:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl ="http://www.w3.org/1999/XSL/Transform"
xmlns:xs ="http://www.w3.org/2001/XMLSchema"
xmlns:fn ="http://www.w3.org/2005/xpath-functions"
xmlns:udf ="http://user.defined.functions"
xmlns:table ="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
xmlns:text ="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<!-- Name and location of the content.xml file from the ODS file -->
<!-- Unzip ODS file to get content.xml -->
<xsl:variable name="ODSpath" select="'content.xml'" />
<xsl:variable name="doc" select="document($ODSpath)/office:document-content/office:body/office:spreadsheet/table:table" />
<!-- Alphabet as a sequence -->
<xsl:variable name="upperCaseLetters" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'" />
<xsl:variable name="lowerCaseLetters" select="'abcdefghijklmnopqrstuvwxyz'" />
<xsl:variable name="alphabet" select="'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'" />
<!-- Helper function: Gets the number of a letter of the alphabet: A=1,B=2,C=3... -->
<xsl:function name="udf:letter-idx" as="xs:integer">
<xsl:param name="coord" as="xs:string"/>
<xsl:variable name="cnt" select="string-length($coord)" />
<xsl:variable name="cur" select="substring($coord,$cnt,1)" />
<xsl:value-of select="if ($cnt = 1) then index-of($alphabet,$cur) else udf:letter-idx(substring($coord,1,$cnt - 1))*26 + index-of($alphabet,$cur)"/>
</xsl:function>
<!-- Convert coordinate string to separate squares in XML format -->
<xsl:template name="ConvertCoordinates">
<xsl:param name="cellStr" as="xs:string" />
<xsl:analyze-string select="$cellStr" regex="([A-Z]+)([0-9]+):([A-Z]+)([0-9]+),?">
<xsl:matching-substring>
<xsl:element name="Item">
<xsl:element name="FromX"><xsl:value-of select="udf:letter-idx(regex-group(1))" /></xsl:element>
<xsl:element name="FromY"><xsl:value-of select="regex-group(2)" /></xsl:element>
<xsl:element name="ToX"><xsl:value-of select="udf:letter-idx(regex-group(3))" /></xsl:element>
<xsl:element name="ToY"><xsl:value-of select="regex-group(4)" /></xsl:element>
</xsl:element>
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:template>
<!-- Get values of every cell in a square -->
<xsl:template name="VisitAll">
<xsl:param name="result" as="element()*" />
<xsl:param name="xStart" as="xs:integer" />
<xsl:param name="yStart" as="xs:integer" />
<xsl:param name="xEnd" as="xs:integer" />
<xsl:param name="yEnd" as="xs:integer" />
<xsl:param name="curX" as="xs:integer" select="$xStart" />
<xsl:param name="curY" as="xs:integer" select="$yStart" />
<xsl:choose>
<!-- Check if xEnd/yEnd has been reached -->
<xsl:when test="$curY le $yEnd">
<xsl:variable name="cellNode" as="element()">
<xsl:call-template name="GetCellValue">
<xsl:with-param name="x" select="$curX" />
<xsl:with-param name="y" select="$curY" />
</xsl:call-template>
</xsl:variable>
<!-- Recurse to next cell x+1/y (next column) or x=xStart/y+1 (next row) -->
<xsl:call-template name="VisitAll">
<xsl:with-param name="result" select="if ($cellNode/text()!='') then ($result,$cellNode) else $result" />
<xsl:with-param name="xStart" select="$xStart" />
<xsl:with-param name="yStart" select="$yStart" />
<xsl:with-param name="xEnd" select="$xEnd" />
<xsl:with-param name="yEnd" select="$yEnd" />
<xsl:with-param name="curX" select="if ($curX = $xEnd) then $xStart else $curX + 1" />
<xsl:with-param name="curY" select="if ($curX = $xEnd) then $curY + 1 else $curY" />
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<!-- Recursion end - return result -->
<xsl:sequence select="$result" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Get the value of one cell at pos x/y -->
<xsl:template name="GetCellValue">
<xsl:param name="x" as="xs:integer" />
<xsl:param name="y" as="xs:integer" />
<xsl:variable name="targetRow" select="$doc/table:table-row[sum(preceding-sibling::*/@table:number-rows-repeated) + position() - count(preceding-sibling::*/@table:number-rows-repeated)= $y]" />
<xsl:variable name="targetCell" select="$targetRow/table:table-cell[sum(preceding-sibling::*/@table:number-columns-repeated) + position() - count(preceding-sibling::*/@table:number-columns-repeated) le $x]" />
<xsl:variable name="targetCellValue" select="$targetCell[last()]/text:p/text()" />
<!-- create namespace-free element with result value -->
<xsl:element name="Text">
<xsl:value-of select="$targetCellValue" />
</xsl:element>
</xsl:template>
<!-- Gets arbitrary number of cells specified in squares (typical for Excel/Office) -->
<!-- For example: A4:B7,E4:E7 -->
<xsl:template name="GetMultipleCellsNodes">
<xsl:param name="cells" />
<!-- Convert cell coord string to coordinate element sets and convert lowercase to uppercase -->
<xsl:variable name="squares">
<xsl:call-template name="ConvertCoordinates">
<xsl:with-param name="cellStr" select="translate($cells,$lowerCaseLetters,$upperCaseLetters)" />
</xsl:call-template>
</xsl:variable>
<!-- Call VisitAll on all squares in each coordinate element set and concatenate to sequence -->
<xsl:variable name="itemSequence" as="element()*">
<xsl:for-each select="$squares/Item">
<!-- Visit every cell in each square -->
<xsl:call-template name="VisitAll">
<xsl:with-param name="result" select="()" />
<xsl:with-param name="xStart" select="FromX" />
<xsl:with-param name="yStart" select="FromY" />
<xsl:with-param name="xEnd" select="ToX" />
<xsl:with-param name="yEnd" select="ToY" />
</xsl:call-template>
</xsl:for-each>
</xsl:variable>
<xsl:sequence select="$itemSequence" />
</xsl:template>
<!-- Demo template -->
<xsl:template match="/">
<xsl:variable name="selectedElements">
<xsl:call-template name="GetMultipleCellsNodes">
<xsl:with-param name="cells" select="'A1:A20,D1:D20,H1:h20'" />
</xsl:call-template>
</xsl:variable>
<xsl:element name="Cells">
<xsl:for-each select="$selectedElements">
<xsl:copy-of select="." />
</xsl:for-each>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
content.xml
文件的名称在开头的ODSpath
变量中指定。因此,源文件不会传递给XSLT处理器,而是传递给包含其名称的变量。在底部的演示模板中使用字符串A1:A20,D1:D20,H1:h20
指定所需的单元格/列。
我用上面的content.xml
测试了这个失败,因为 LibreOffice 表示文件已损坏(???)。但是(重新)使用图像中的内容创建文件成功并产生以下输出:
<?xml version="1.0" encoding="UTF-8"?>
<Cells>
<Text>item1</Text>
<Text>item2</Text>
<Text>item3</Text>
<Text>item4</Text>
<Text>item5</Text>
<Text>item6</Text>
<Text>item7</Text>
</Cells>
要获得上述确切结果,只需将这三列拆分为单独调用GetMultipleCellsNodes
:
<!-- Demo template 2 -->
<xsl:template match="/">
<xsl:variable name="group1">
<xsl:call-template name="GetMultipleCellsNodes">
<xsl:with-param name="cells" select="'A1:A20'" />
</xsl:call-template>
</xsl:variable>
<xsl:variable name="group4">
<xsl:call-template name="GetMultipleCellsNodes">
<xsl:with-param name="cells" select="'D1:D20'" />
</xsl:call-template>
</xsl:variable>
<xsl:variable name="group8">
<xsl:call-template name="GetMultipleCellsNodes">
<xsl:with-param name="cells" select="'H1:h20'" />
</xsl:call-template>
</xsl:variable>
<xsl:element name="group1">
<xsl:for-each select="$group1/Text">
<xsl:element name="Description"><xsl:value-of select="." /></xsl:element>
</xsl:for-each>
</xsl:element>
<xsl:element name="group4">
<xsl:for-each select="$group4/Text">
<xsl:element name="Description"><xsl:value-of select="." /></xsl:element>
</xsl:for-each>
</xsl:element>
<xsl:element name="group8">
<xsl:for-each select="$group8/Text">
<xsl:element name="Description"><xsl:value-of select="." /></xsl:element>
</xsl:for-each>
</xsl:element>
</xsl:template>
此演示模板的输出是:
<?xml version="1.0" encoding="UTF-8"?>
<group1>
<Description>item1</Description>
<Description>item2</Description>
</group1>
<group4>
<Description>item3</Description>
<Description>item4</Description>
</group4>
<group8>
<Description>item5</Description>
<Description>item6</Description>
<Description>item7</Description>
</group8>