XML将节点值解析为字符串

时间:2012-09-23 07:29:51

标签: java xml java-ee xpath xml-parsing

我的xml文件看起来像这样。我希望得到节点文本内容,如下所示。

<property regex=".*" xpath=".*">
     <value>
          127.0.0.1
     </value>
<property regex=".*" xpath=".*">
<value>

</value>
</property>

我希望按照文件中指定的顺序获取文字。这是我的java代码。

Document doc = parseDocument("properties.xml");
NodeList properties = doc.getElementsByTagName("property");
for( int i = 0 , len = properties.getLength() ; i < len ; i++) {
     Element property = (Element)properties.item(i);
     //How can i proceed further .
}

预期输出:

 Node 1 : 127.0.0.1

请提出您的意见。

4 个答案:

答案 0 :(得分:3)

以下方法查找文档中的所有property元素,并使用XPath收集那些名为value的元素的所有value子元素。

  private static List<Element> getValueElements(Document document) {
    List<Element> result = new ArrayList<Element>();
    NodeList propertyElements = document.getElementsByTagName("property");
    for (int i = 0, ilen = propertyElements.getLength(); i < ilen; i++) {
      Node propertyNode = propertyElements.item(i);
      if (!(propertyNode instanceof Element))
        continue;

      NodeList children = ((Element) propertyNode).getChildNodes();
      for (int j = 0, jlen = children.getLength(); j < jlen; j++) {
        Node child = children.item(j);
        if (!(child instanceof Element) || !"value".equals(child.getNodeName()))
          continue;

        result.add((Element) child);
      }
    }
    return result;
  }

但是你可以使用XPath表达式//property/value以更优雅的方式做同样的事情:

private static List<Element> getValueElementsUsingXpath(Document document) throws XPathExpressionException {
  XPath xpath = XPathFactory.newInstance().newXPath();
  // XPath Query for showing all nodes value
  XPathExpression expr = xpath.compile("//property/value");
  Object xpathResult = expr.evaluate(document, XPathConstants.NODESET);

  List<Element> result = new ArrayList<Element>();
  NodeList nodes = (NodeList) xpathResult;
  for (int i = 0; i < nodes.getLength(); i++) {
    Node valueNode = nodes.item(i);
    if (!(valueNode instanceof Element)) continue;
    result.add((Element) valueNode);
  }

  return result;
}

您可以像上面这样使用上述方法:

  public static void main(String[] args) throws Exception {
    Document doc = parseDocument("properties.xml");
    List<Element> valueElements = getValueElements(doc);  // or getValueElementsUsingXpath(doc)

    int nodeNumber = 0;
    for (Element element : valueElements) {
      nodeNumber++;
      System.out.println("Node " + nodeNumber + ": " + formatValueElement(element));
    }
  }

  private static String formatValueElement(Element element) {
    StringBuffer result = new StringBuffer();

    boolean first = true;
    NodeList children = ((Element) element).getChildNodes();
    for (int i = 0, len = children.getLength(); i < len; i++) {
      Node child = children.item(i);

      String childText = null;
      switch (child.getNodeType()) {
      case Node.CDATA_SECTION_NODE:
      case Node.TEXT_NODE:
        childText = child.getTextContent().trim();
      }

      if (childText == null || childText.isEmpty()) {
        continue;
      }

      if (first)
        first = false;
      else
        result.append(" ");

      result.append(childText);
    }

    return result.toString();
  }

我使用以下两个XML输入对其进行了测试,因为您的XML缺少结束</property>标记。

这是第一个(我添加了额外的元素,以表明找不到它们):

  <rootNode>
  <property regex=".*" xpath=".*">
       <value>
            127.0.0.1
       </value>
       <anythingElse>Text here</anythingElse>
  </property>
  <anythingElse>Text here</anythingElse>
  <property regex=".*" xpath=".*">
  <value>
       val <![CDATA[
       <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/>
       ]]> test
  </value>
  </property>
  </rootNode>

第二个具有嵌套属性元素(我在末尾添加了缺少的元素):

  <property regex=".*" xpath=".*">
      <value>
          127.0.0.1
      </value>
      <property regex=".*" xpath=".*">
      <value>
          val <![CDATA[
          <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/>
          ]]> test
      </value>
      </property>
  </property>

答案 1 :(得分:0)

Document doc = parseDocument("properties.xml");
NodeList properties = doc.getElementsByTagName("property");
for( int i = 0 , len = properties.getLength() ; i < len ; i++) {
     Element property = (Element)properties.item(i);
     Element value = (Element)property.getElementsByTagName("value").item(0);
     if (value != null)
     {
        System.out.println("Node " + (i + 1) + ": " + value.getTextContent());
     }
}

应该有助于访问元素的内容。请注意,如果您想要发布的确切结果,您可能还需要或可能想要去除前导和尾随空格。

答案 2 :(得分:0)

逐个读取节点值:

    private static void printValues(String xml) throws Exception {
    Element element = parseXml(xml);

    NodeList values = element.getElementsByTagName("value");
    for(int i = 0; i<values.getLength(); i++){
        Node item = values.item(i);
        NodeList vals = item.getChildNodes();

        String value = ""; 

        for(int j = 0; j<vals.getLength(); j++){
            value += vals.item(j).getNodeValue(); 
        }

        System.out.print("Node ");
        System.out.print(Integer.toString(i));
        System.out.print(": ");
        System.out.println(value.trim());
    }
}

public static Element parseXml(String source) throws Exception{
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
    Document doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(source.getBytes("utf-8"))));
    Element element = doc.getDocumentElement();
    element.normalize();

    return element;
}

答案 3 :(得分:0)

您可以使用XSLT以另一种方式解决此问题。这是Java代码:

public static void main(String args[]) throws TransformerException{

    String xmlFilePath = "/path/to/xml";
    String xslFilePath = "/path/to/xsl";

    Source xmlSource = new StreamSource(new File(xmlFilePath));
    Source xsltSource = new StreamSource(new File(xslFilePath));        
    Result transResult = new StreamResult(System.out);

    TransformerFactory transFact = TransformerFactory.newInstance();
    Transformer trans = transFact.newTransformer(xsltSource);

    trans.transform(xmlSource, transResult);

}

,这是样式表文件:

<?xml version="1.0" encoding="ISO-8859-1"?>

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

    <xsl:output method="text" omit-xml-declaration="yes" />

    <xsl:template match="/">
        <xsl:apply-templates select="*/property"/>
    </xsl:template>

    <xsl:template match="property">
        <xsl:number /> 
        <xsl:text> </xsl:text>
        <xsl:apply-templates select="node()" />
        <xsl:text>&#xa;</xsl:text>
    </xsl:template>

    <xsl:template match="node()">
        <xsl:if test="normalize-space(.)">
            <xsl:value-of select="normalize-space(.)" />
        </xsl:if>
    </xsl:template>

</xsl:stylesheet>

应用于此输入时:

  <root>
  <property regex=".*" xpath=".*">
       <value>
            127.0.0.1
       </value>
       <anythingElse>Text here</anythingElse>
  </property>
  <property regex=".*" xpath=".*">
  <value>
       val <![CDATA[
       <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/>
       ]]> test
  </value>
  </property>
  </root>

将产生以下输出:

1 127.0.0.1
2 val <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/> test