如何通过XSLT 2.0从.ODS文件中读取单元块的值

时间:2017-08-08 08:44:31

标签: xml xslt

我希望使用XSLT 2.0转换获取.ods文件/content.xml中的单元格中的值。无论空白单元格如何,我都希望获得列D的值。

可以是任何要迭代的列。

对于所有其他列也是如此,我想将它们放入XML输出中的不同节点。

例如,这个content.xml文件:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<office:document-content
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" 
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:msoxl="http://schemas.microsoft.com/office/excel/formula">
<office:font-face-decls>
    <style:font-face style:name="Calibri" svg:font-family="Calibri"/>
</office:font-face-decls>
<office:automatic-styles>
    <style:style style:name="ce1" style:family="table-cell" style:parent-style-name="Default" style:data-style-name="N0"/>
    <style:style style:name="co1" style:family="table-column">
        <style:table-column-properties fo:break-before="auto" style:column-width="1.69333333333333cm"/>
    </style:style>
    <style:style style:name="ro1" style:family="table-row">
        <style:table-row-properties style:row-height="15pt" style:use-optimal-row-height="true" fo:break-before="auto"/>
    </style:style>
    <style:style style:name="ta1" style:family="table" style:master-page-name="mp1">
        <style:table-properties table:display="true" style:writing-mode="lr-tb"/>
    </style:style>
</office:automatic-styles>
<office:body>
    <office:spreadsheet>
        <table:calculation-settings table:case-sensitive="false" table:search-criteria-must-apply-to-whole-cell="false"/>
        <table:table table:name="Folha1" table:style-name="ta1">
            <table:table-column table:style-name="co1" table:number-columns-repeated="16384" table:default-cell-style-name="ce1"/>
            <table:table-row table:style-name="ro1">
                <table:table-cell office:value-type="string" table:style-name="ce1">
                    <text:p>item1</text:p>
                </table:table-cell>
                <table:table-cell table:number-columns-repeated="2" table:style-name="ce1"/>
                <table:table-cell office:value-type="string" table:style-name="ce1">
                    <text:p>item3</text:p>
                </table:table-cell>
                <table:table-cell table:number-columns-repeated="16380" table:style-name="ce1"/>
            </table:table-row>
            <table:table-row table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="16384"/>
            </table:table-row>
            <table:table-row table:style-name="ro1">
                <table:table-cell office:value-type="string" table:style-name="ce1">
                    <text:p>item2</text:p>
                </table:table-cell>
                <table:table-cell table:number-columns-repeated="16383" table:style-name="ce1"/>
            </table:table-row>
            <table:table-row table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="16384"/>
            </table:table-row>
            <table:table-row table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="3" table:style-name="ce1"/>
                <table:table-cell office:value-type="string" table:style-name="ce1">
                    <text:p>item4</text:p>
                </table:table-cell>
                <table:table-cell table:number-columns-repeated="3" table:style-name="ce1"/>
                <table:table-cell office:value-type="string" table:style-name="ce1">
                    <text:p>item5</text:p>
                </table:table-cell>
                <table:table-cell table:number-columns-repeated="16376"/>
            </table:table-row>
            <table:table-row table:number-rows-repeated="5" table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="16384"/>
            </table:table-row>
            <table:table-row table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="7" table:style-name="ce1"/>
                <table:table-cell office:value-type="string" table:style-name="ce1">
                    <text:p>item5</text:p>
                </table:table-cell>
                <table:table-cell table:number-columns-repeated="16376"/>
            </table:table-row>
            <table:table-row table:number-rows-repeated="3" table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="16384"/>
            </table:table-row>
            <table:table-row table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="7" table:style-name="ce1"/>
                <table:table-cell office:value-type="string" table:style-name="ce1">
                    <text:p>item6</text:p>
                </table:table-cell>
                <table:table-cell table:number-columns-repeated="16376"/>
            </table:table-row>
            <table:table-row table:number-rows-repeated="1048561" table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="16384"/>
            </table:table-row>
        </table:table>
        <table:table table:name="Folha2" table:style-name="ta1">
            <table:table-column table:style-name="co1" table:number-columns-repeated="16384" table:default-cell-style-name="ce1"/>
            <table:table-row table:number-rows-repeated="1048576" table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="16384"/>
            </table:table-row>
        </table:table>
        <table:table table:name="Folha3" table:style-name="ta1">
            <table:table-column table:style-name="co1" table:number-columns-repeated="16384" table:default-cell-style-name="ce1"/>
            <table:table-row table:number-rows-repeated="1048576" table:style-name="ro1">
                <table:table-cell table:number-columns-repeated="16384"/>
            </table:table-row>
        </table:table>
    </office:spreadsheet>
</office:body>
</office:document-content>


<?xml version="1.0" encoding="UTF-8"?>

    

基本XSLT示例:

<xsl:template match="office:document-content/office:body/office:spreadsheet//table:table[@table:name='Folha1']">
    <xsl:variable name="description" select="table:table-row/table:table-cell[4]/text:p"/>
    <!--xsl:if test="$description != '.'"-->
    <Description>
        <xsl:value-of select="$description"/>
    </Description>
</xsl:template>

这个例子的输出给了我&#34; item5&#34;,好吧,但我真的不知道这背后的逻辑 - 它是如何计算的等等。

据我所知,在XSLT中,空单元格并未真正计算在内! 我是对的吗? 但不知怎的,.ods文件确实保存了这些空单元格和文档结构,以便下次打开文件时可以检索它。
但是如何?

所以我想知道的是:
我们如何真正选择计算这些项目&#34;空&#34;也可以在单元格中找到每个项目的特定位置。

所需的输出如下:

<group1>
    <Description>item</Description>
    <Description>item2</Description>
<group4>
    <Description>item3</Description>
    <Description>item4</Description>
<group8>
    <Description>item5</Description>
    <Description>item6</Description>
    <Description>item7</Description>

1 个答案:

答案 0 :(得分:1)

我扩展了我的代码以解决此问题。 在this SO answer中,我解释了.ods文件结构的一小部分以及如何查找特定单元格。新代码将其扩展为常见电子表格符号中的单元格块,如

  

A1:A5
  B12:C14,E5:E7

并利用XSLT-2.0功能。我并不认为这是最优雅的解决方案,但迄今为止的测试看起来很有希望。

以下是检索单元格块内容的代码:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" 
  xmlns:xsl   ="http://www.w3.org/1999/XSL/Transform" 
  xmlns:xs    ="http://www.w3.org/2001/XMLSchema" 
  xmlns:fn    ="http://www.w3.org/2005/xpath-functions"
  xmlns:udf   ="http://user.defined.functions"
  xmlns:table ="urn:oasis:names:tc:opendocument:xmlns:table:1.0" 
  xmlns:text  ="urn:oasis:names:tc:opendocument:xmlns:text:1.0" 
  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"> 

    <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
    <!-- Name and location of the content.xml file from the ODS file -->
    <!-- Unzip ODS file to get content.xml -->
    <xsl:variable name="ODSpath" select="'content.xml'" />
    <xsl:variable name="doc" select="document($ODSpath)/office:document-content/office:body/office:spreadsheet/table:table" /> 

    <!-- Alphabet as a sequence -->
    <xsl:variable name="upperCaseLetters" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'" />
    <xsl:variable name="lowerCaseLetters" select="'abcdefghijklmnopqrstuvwxyz'" />
    <xsl:variable name="alphabet" select="'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'" />

    <!-- Helper function: Gets the number of a letter of the alphabet: A=1,B=2,C=3... -->
    <xsl:function name="udf:letter-idx" as="xs:integer">
      <xsl:param    name="coord" as="xs:string"/>
      <xsl:variable name="cnt"   select="string-length($coord)" />
      <xsl:variable name="cur"   select="substring($coord,$cnt,1)" />
      <xsl:value-of select="if ($cnt = 1) then index-of($alphabet,$cur) else udf:letter-idx(substring($coord,1,$cnt - 1))*26 + index-of($alphabet,$cur)"/>
    </xsl:function>

    <!-- Convert coordinate string to separate squares in XML format -->
    <xsl:template name="ConvertCoordinates">
      <xsl:param name="cellStr" as="xs:string" />
      <xsl:analyze-string select="$cellStr" regex="([A-Z]+)([0-9]+):([A-Z]+)([0-9]+),?">
        <xsl:matching-substring>
          <xsl:element name="Item">
            <xsl:element name="FromX"><xsl:value-of select="udf:letter-idx(regex-group(1))" /></xsl:element>
            <xsl:element name="FromY"><xsl:value-of select="regex-group(2)" /></xsl:element>
            <xsl:element name="ToX"><xsl:value-of select="udf:letter-idx(regex-group(3))" /></xsl:element>
            <xsl:element name="ToY"><xsl:value-of select="regex-group(4)" /></xsl:element>
          </xsl:element>
        </xsl:matching-substring>
      </xsl:analyze-string>
    </xsl:template>

    <!-- Get values of every cell in a square -->
    <xsl:template name="VisitAll">    
        <xsl:param name="result" as="element()*" />
        <xsl:param name="xStart" as="xs:integer" />
        <xsl:param name="yStart" as="xs:integer" />
        <xsl:param name="xEnd"   as="xs:integer" />
        <xsl:param name="yEnd"   as="xs:integer" />
        <xsl:param name="curX"   as="xs:integer" select="$xStart" />
        <xsl:param name="curY"   as="xs:integer" select="$yStart" />
        <xsl:choose>
          <!-- Check if xEnd/yEnd has been reached -->
          <xsl:when test="$curY le $yEnd">
            <xsl:variable name="cellNode" as="element()">
              <xsl:call-template name="GetCellValue">
                <xsl:with-param name="x" select="$curX" />
                <xsl:with-param name="y" select="$curY" />
              </xsl:call-template>
            </xsl:variable>    
            <!-- Recurse to next cell x+1/y (next column) or x=xStart/y+1 (next row) -->
            <xsl:call-template name="VisitAll">
              <xsl:with-param name="result" select="if ($cellNode/text()!='') then ($result,$cellNode) else $result" />
              <xsl:with-param name="xStart" select="$xStart" />
              <xsl:with-param name="yStart" select="$yStart" />
              <xsl:with-param name="xEnd" select="$xEnd" />
              <xsl:with-param name="yEnd" select="$yEnd" />
              <xsl:with-param name="curX" select="if ($curX = $xEnd) then $xStart else $curX + 1" />
              <xsl:with-param name="curY" select="if ($curX = $xEnd) then $curY + 1 else $curY" />
            </xsl:call-template>
          </xsl:when>
          <xsl:otherwise>
            <!-- Recursion end - return result -->
            <xsl:sequence select="$result" />
          </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

    <!-- Get the value of one cell at pos x/y -->
    <xsl:template name="GetCellValue">
      <xsl:param name="x" as="xs:integer" />
      <xsl:param name="y" as="xs:integer" />
      <xsl:variable name="targetRow" select="$doc/table:table-row[sum(preceding-sibling::*/@table:number-rows-repeated) + position() - count(preceding-sibling::*/@table:number-rows-repeated)= $y]" />
      <xsl:variable name="targetCell" select="$targetRow/table:table-cell[sum(preceding-sibling::*/@table:number-columns-repeated) + position() - count(preceding-sibling::*/@table:number-columns-repeated) le $x]" />
      <xsl:variable name="targetCellValue" select="$targetCell[last()]/text:p/text()" />
      <!-- create namespace-free element with result value -->
      <xsl:element name="Text">
        <xsl:value-of select="$targetCellValue" />
      </xsl:element>
    </xsl:template>

    <!-- Gets arbitrary number of cells specified in squares (typical for Excel/Office) --> 
    <!-- For example: A4:B7,E4:E7 -->
    <xsl:template name="GetMultipleCellsNodes">
      <xsl:param name="cells" />
      <!-- Convert cell coord string to coordinate element sets and convert lowercase to uppercase -->
      <xsl:variable name="squares">
        <xsl:call-template name="ConvertCoordinates">
          <xsl:with-param name="cellStr" select="translate($cells,$lowerCaseLetters,$upperCaseLetters)" />
        </xsl:call-template>       
      </xsl:variable>
      <!-- Call VisitAll on all squares in each coordinate element set and concatenate to sequence -->
      <xsl:variable name="itemSequence" as="element()*">
        <xsl:for-each select="$squares/Item">
          <!-- Visit every cell in each square -->
          <xsl:call-template name="VisitAll">
            <xsl:with-param name="result" select="()" />
            <xsl:with-param name="xStart" select="FromX" />
            <xsl:with-param name="yStart" select="FromY" />
            <xsl:with-param name="xEnd" select="ToX" />
            <xsl:with-param name="yEnd" select="ToY" />
          </xsl:call-template>
        </xsl:for-each>
      </xsl:variable>
      <xsl:sequence select="$itemSequence" />
    </xsl:template>

    <!-- Demo template -->
    <xsl:template match="/">
      <xsl:variable name="selectedElements">
        <xsl:call-template name="GetMultipleCellsNodes">
          <xsl:with-param name="cells" select="'A1:A20,D1:D20,H1:h20'" />
        </xsl:call-template>       
      </xsl:variable>
      <xsl:element name="Cells">
        <xsl:for-each select="$selectedElements">
          <xsl:copy-of select="." />
        </xsl:for-each>
      </xsl:element>
    </xsl:template>

</xsl:stylesheet>

content.xml文件的名称在开头的ODSpath变量中指定。因此,源文件不会传递给XSLT处理器,而是传递给包含其名称的变量。在底部的演示模板中使用字符串A1:A20,D1:D20,H1:h20指定所需的单元格/列。

我用上面的content.xml测试了这个失败,因为 LibreOffice 表示文件已损坏(???)。但是(重新)使用图像中的内容创建文件成功并产生以下输出:

<?xml version="1.0" encoding="UTF-8"?>
<Cells>
   <Text>item1</Text>
   <Text>item2</Text>
   <Text>item3</Text>
   <Text>item4</Text>
   <Text>item5</Text>
   <Text>item6</Text>
   <Text>item7</Text>
</Cells>

要获得上述确切结果,只需将这三列拆分为单独调用GetMultipleCellsNodes

<!-- Demo template 2 -->
<xsl:template match="/">
  <xsl:variable name="group1">
    <xsl:call-template name="GetMultipleCellsNodes">
      <xsl:with-param name="cells" select="'A1:A20'" />
    </xsl:call-template>       
  </xsl:variable>
  <xsl:variable name="group4">
    <xsl:call-template name="GetMultipleCellsNodes">
      <xsl:with-param name="cells" select="'D1:D20'" />
    </xsl:call-template>       
  </xsl:variable>
  <xsl:variable name="group8">
    <xsl:call-template name="GetMultipleCellsNodes">
      <xsl:with-param name="cells" select="'H1:h20'" />
    </xsl:call-template>       
  </xsl:variable>
  <xsl:element name="group1">
    <xsl:for-each select="$group1/Text">
      <xsl:element name="Description"><xsl:value-of select="." /></xsl:element>
    </xsl:for-each>
  </xsl:element>
  <xsl:element name="group4">
    <xsl:for-each select="$group4/Text">
      <xsl:element name="Description"><xsl:value-of select="." /></xsl:element>
    </xsl:for-each>
  </xsl:element>
  <xsl:element name="group8">
    <xsl:for-each select="$group8/Text">
      <xsl:element name="Description"><xsl:value-of select="." /></xsl:element>
    </xsl:for-each>
  </xsl:element>
</xsl:template>    

此演示模板的输出是:

<?xml version="1.0" encoding="UTF-8"?>
<group1>
   <Description>item1</Description>
   <Description>item2</Description>
</group1>
<group4>
   <Description>item3</Description>
   <Description>item4</Description>
</group4>
<group8>
   <Description>item5</Description>
   <Description>item6</Description>
   <Description>item7</Description>
</group8>