使用java删除XML中的重复项的小问题

时间:2013-04-23 10:30:30

标签: java xml dom xpath xml-parsing

这是示例XML:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
        <check>
        <val>
            <Samsung>
               <name value="galaxy" />
               <name value="galaxy" />
               <name value="galaxys" />
                <id value="123" />
                <id value="123" />
                <cal>23</cal>
                <cal>23</cal>
                <name2 value="galaxy" />
           </Samsung>
            <htc>
               <name value="galaxy" />
               <name value="galaxy" />
                <name value="galaxys" />
              <id value="123" />
              <id value="123" />
             <name2 value="galaxy" />
            </htc>
       </val>
      </check>

这是我编写的java代码,它找到重复的元素(包括它们的属性)并删除它们:

    import java.io.File;
     import java.io.IOException;

  import javax.xml.parsers.DocumentBuilder;
      import javax.xml.parsers.DocumentBuilderFactory;
     import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.transform.OutputKeys;
        import javax.xml.transform.Transformer;
       import javax.xml.transform.TransformerException;
       import javax.xml.transform.TransformerFactory;
       import javax.xml.transform.dom.DOMSource;
      import javax.xml.transform.stream.StreamResult;
      import javax.xml.xpath.XPathConstants;
    import javax.xml.xpath.XPathExpression;
      import javax.xml.xpath.XPathExpressionException;
      import javax.xml.xpath.XPathFactory;

   import org.w3c.dom.Document;
      import org.w3c.dom.Element;
  import org.w3c.dom.Node;
      import org.w3c.dom.NodeList;
      import org.xml.sax.SAXException;
public class RecursiveNodeCheck {

/**
 * @param args
 */
public static String parent;
public static void main(String[] args) {
    // TODO Auto-generated method stub
    String path="D:/vodafone/parse.xml";

    try {

        File file = new File(path);

        DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance()
        .newDocumentBuilder();

        Document doc = dBuilder.parse(file);

        System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
        parent=doc.getDocumentElement().getNodeName();
        String name="//"+parent+"/*";
        XPathExpression expr = XPathFactory.newInstance().newXPath()
        .compile(name);

        NodeList list = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);


        recursive(doc,list,path);


    } catch (Exception e) {
        System.out.println(e.getMessage());
    }



}

private static void recursive(Document doc,NodeList list,String path) throws XPathExpressionException, TransformerException, SAXException, ParserConfigurationException, IOException {
    // TODO Auto-generated method stub
    for (int count = 0; count < list.getLength(); count++) {
        Node tempNode = list.item(count);
        if(!(tempNode.getNodeName().equalsIgnoreCase(parent) )){
            if(!tempNode.hasChildNodes()&& tempNode.getNodeType() == Node.ELEMENT_NODE){
                Node head=  tempNode.getParentNode();

                Node current=head.getFirstChild();

                String exp=null;
                while(current!=null ){
                    if (current.getNodeType() != Node.ELEMENT_NODE ){
                        current=current.getNextSibling();
                    }else{
                        if( current.hasAttributes()  ){
                            String key = current.getAttributes().getNamedItem("value").getNodeValue();
                            String value="";
                            if(current.getNodeValue()!=null){
                                value = current.getNodeValue();
                            }
                            exp= "//"+head.getNodeName()+"/"+current.getNodeName()+"[@value='"+key+"']"+value;

                            DelElements(doc,exp,path);
                            current = current.getNextSibling();
                        }
                        else{

                            exp= "//"+head.getNodeName()+"/"+current.getNodeName()+"/text()";
                            DelElements(doc,exp,path);
                            current = current.getNextSibling();
                        }
                    }





                }

            }else{
                recursive(doc,tempNode.getChildNodes(),path);   
            }}

        if (tempNode.hasChildNodes()) {

            // loop again if has child nodes
            recursive(doc,tempNode.getChildNodes(),path);   

        }
        System.out.println("Node Name =" + tempNode.getNodeName() + " [CLOSE]");



    }




}

private static void DelElements(Document doc, String exp,String path) throws TransformerException, SAXException, ParserConfigurationException, IOException {
    // TODO Auto-generated method stub

    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setNamespaceAware(true);

    NodeList nodes = null;
    try {
         doc = factory.newDocumentBuilder().parse(new File(path));


        XPathExpression expr = XPathFactory.newInstance().newXPath()
        .compile(exp);

        nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
    } catch (XPathExpressionException e) {
        e.printStackTrace();
    }

     for ( int i= nodes.getLength()-1;i>0; i--) {
            System.out.println("."); //progress indicator
            if (nodes.item(i).getNodeType() == Node.TEXT_NODE ){
                 nodes.item(i).getParentNode().removeChild( nodes.item(i));

            }
            else{
                 Element el = (Element) ( nodes.item(i));
               el.getParentNode().removeChild(el);
            }


        }
    TransformerFactory transformerFactory = TransformerFactory.newInstance();

    Transformer transformer = transformerFactory.newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    DOMSource source = new DOMSource(doc);
    StreamResult result = new StreamResult(new File(path));
    transformer.transform(source, result);
    // TODO Auto-generated method stub

}





    }
     }

这是我得到的输出:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<check>
<val>
    <Samsung>
        <name value="galaxy"/>

        <name value="galaxys"/>
        <id value="123"/>

        <cal>23</cal>
        **<cal/>**
        <name2 value="galaxy"/>
    </Samsung>
    <htc>
        <name value="galaxy"/>

        <name value="galaxys"/>
        <id value="123"/>

        <name2 value="galaxy"/>
    </htc>
</val>
</check>

在上面的输出xml中,<cal/>标记仍然存在。我应该如何删除它?

1 个答案:

答案 0 :(得分:0)

更改XPATH表达式:  exp= "//"+head.getNodeName()+"/"+current.getNodeName()+"[text()='"+current.getTextContent()+"']";

这个给出正确的输出。