获取所有XML节点的路径

时间:2016-03-23 13:37:17

标签: java xml

我有一个XML示例如下:

yum install vim-common

我的目标是获取输出,它将显示每个元素XML的路径,如下所示:

<message>
  <metadata> 
    <msg_id>1</msg_id>
    <client_type>type1</client_type>
  </metadata>
  <individual>
    <name>John</name>
    <surname>Smith</surname>
      <additional_information>
        <e_mail>aaa@gmail.com</e_mail>
        <phone_number>11110000</phone_number>
      </additional_information>
  </individual>
</message>

等等。我怎样才能在Java中处理这个问题?

提前感谢任何提示!

2 个答案:

答案 0 :(得分:4)

您可以xPath按表达式选择节点并打印每个节点的路径,

这是java代码:

public static void main(String[] args) throws Exception {
    File file = new File("src/main/resources/file.xml");
    XPath xPath =  XPathFactory.newInstance().newXPath();
    String expression = "//*[not(*)]";

    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = builderFactory.newDocumentBuilder();
    Document document = builder.parse(file);
    document.getDocumentElement().normalize();

    NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(document, XPathConstants.NODESET);
    for(int i = 0 ; i < nodeList.getLength(); i++) {
        System.out.println(getXPath(nodeList.item(i)));
    }
}

private static String getXPath(Node node) {
    Node parent = node.getParentNode();
    if (parent == null) {
        return node.getNodeName();
    }
    return getXPath(parent) + "/" + node.getNodeName();
}

最终输出是:

文件/消息/元数据/ MSG_ID

文件/消息/元数据/ client_type

文件/消息/个体/名称

文件/消息/个体/姓

答案 1 :(得分:1)

尚未发表评论(尚无评论),但之前提供的答案并未解决数组内的节点问题。当被检查的子节点位于某种数组元素

中时,以下内容将确定节点的完整路径
public static void main(String[] args) throws Exception {
   File file = new File("src/main/resources/file.xml");
   XPath xPath =  XPathFactory.newInstance().newXPath();
   String expression = "//*[not(*)]";

   DocumentBuilderFactory builderFactory = 
   DocumentBuilderFactory.newInstance();
   DocumentBuilder builder = builderFactory.newDocumentBuilder();
   Document document = builder.parse(file);
   document.getDocumentElement().normalize();

   NodeList nodeList = (NodeList) 
   xPath.compile(expression).evaluate(document, XPathConstants.NODESET);
   for(int i = 0 ; i < nodeList.getLength(); i++) {
      System.out.println(getNodePath(nodeList.item(i)));
   }
}

/**
 * Builds the Path to the Node in the XML Structure.
 *
 * @param node Child {@link Node}
 * @return {@link String} representation of Path to XML Node.
 */
public String getNodePath(Node node) {
    if(node == null) {
        throw new IllegalArgumentException("Node cannot be null");
    }
    StringBuilder pathBuilder = new StringBuilder("/");
    pathBuilder.append(node.getNodeName());

    Node currentNode = node;

    if(currentNode.getNodeType() != Node.DOCUMENT_NODE) {
        while (currentNode.getParentNode() != null) {
            currentNode = currentNode.getParentNode();

            if(currentNode.getNodeType() == Node.DOCUMENT_NODE) {
                break;
            } else if(getIndexOfArrayNode(currentNode) != null) {
                pathBuilder.insert(0, "/" + currentNode.getNodeName() + "[" + getIndexOfArrayNode(currentNode) + "]");
            } else {
                pathBuilder.insert(0, "/" + currentNode.getNodeName());
            }
        }
    }

    return pathBuilder.toString();
}

/**
 * TODO - doesn't handle Formatted XML - treats formatting as Text Nodes and needs to skip these.
 *
 * Light node test to see if Node is part of an Array of Elements.
 *
 * @param node {@link Node}
 * @return True if part of an array. Otherwise false.
 */
private boolean isArrayNode(Node node) {
    if (node.getNextSibling() == null && node.getPreviousSibling() == null) {
        // Node has no siblings
        return false;
    } else {
        // Check if node siblings are of the same name. If so, then we are inside an array.
        return (node.getNextSibling() != null && node.getNextSibling().getNodeName().equalsIgnoreCase(node.getNodeName()))
                || (node.getPreviousSibling() != null && node.getPreviousSibling().getNodeName().equalsIgnoreCase(node.getNodeName()));
    }
}

/**
 *  TODO - doesn't handle Formatted XML - treats formatting as Text Nodes and needs to skip these.
 *  Figures out the Index of the Array Node.
 *
 *  @param node {@link Node}
 *  @return Index of element in array. Returns null if not inside an array.
 */
private Integer getIndexOfArrayNode(Node node) {
    if(isArrayNode(node)) {
        int leftCount = 0;

        Node currentNode = node.getPreviousSibling();

        while(currentNode != null) {
            leftCount++;
            currentNode = currentNode.getPreviousSibling();
        }
        return leftCount;
    } else {
        return null;
    }
}

输出将是这样的(可能):

/messages[0]/message/metadata/msg_id
/messages[0]/message/metadata/client_type
/messages[0]/message/individual/name
/messages[1]/message/metadata/msg_id
/messages[1]/message/metadata/client_type
/messages[1]/message/individual/name
etc.