Java - 使用XPATH进行XML解析

时间:2016-04-11 15:12:43

标签: java xml dom xpath sax

我有XML:

<Table>
   <Row ss:Index="74" ss:AutoFitHeight="0" ss:Height="14">
     <Cell ss:Index="1" ss:MergeAcross="3" ss:StyleID="s29">
       <ss:Data ss:Type="Number" xmlns="http://www.w3.org/TR/REC-html40">
       0.00
       </ss:Data>
     </Cell>
    <Cell ss:Index="15" ss:MergeAcross="5" ss:StyleID="s29">
       <ss:Data ss:Type="Number" xmlns="http://www.w3.org/TR/REC-html40">
       4.57
       </ss:Data>
    </Cell>
  </Row>

这是用于提取内容的代码,例如。 &#34; 0.00&#34;,基于行索引&amp;细胞指数:

public static String getCellValueNum(String filename, int rowIdx, int colIdx) {
    // search for Table element anywhere in the source
    String tableElementPattern = "//*[name()='Table']";
    // search for Row element with given number
    String rowPattern = String.format("/*[name()='Row' and @ss:Index='%d']", rowIdx) ;
    // search for Cell element with given column number
    String cellPattern = String.format("/*[name()='Cell' and @ss:Index='%d']", colIdx) ;  
    // search for element that has ss:Type="String" attribute, search for element with text under it and get text name
    String cellStringContent = "/*[@ss:Type='Number']/*[text()]/text()";  
    String completePattern = tableElementPattern + rowPattern + cellPattern + cellStringContent;

    try (FileReader reader = new FileReader(filename)) {
        XPath xPath = getXpathProcessor();
        Node n = (Node)xPath.compile(completePattern)
        .evaluate(new InputSource(reader), XPathConstants.NODE);
        if (n.getNodeType() == Node.TEXT_NODE) {
            return n.getNodeValue().trim();
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

private static XPath getXpathProcessor() {
    // this is where the custom implementation of NamespaceContext is used
    NamespaceContext context = new NamespaceContextMap(
        "html", "http://www.w3.org/TR/REC-html40", 
        //"xsl", "http://www.w3.org/1999/XSL/Transform",
        "o", "urn:schemas-microsoft-com:office:office",
        "x", "urn:schemas-microsoft-com:office:excel",
        "ss", "urn:schemas-microsoft-com:office:spreadsheet");
    XPath xpath =  XPathFactory.newInstance().newXPath();
    xpath.setNamespaceContext(context);
    return xpath;
}

在以下情况下完全正常:Type =&#39; String&#39;&#39;,但是当ss:Type =&#39; Number&#39;它给出了错误:

java.lang.NullPointerException
at XpathBill.getCellValueNum(XpathBill.java:55)
at XpathBill.main(XpathBill.java:100)

我想在这里:

if (n.getNodeType() == Node.TEXT_NODE)

它应该是其他东西而不是TEXT_NODE,我尝试了其他NodeType命名常量,但它没有用。

请帮助。 谢谢!

0 个答案:

没有答案