如何使用java从XML文件中正确解析子实体?

时间:2016-11-28 21:20:13

标签: java xml

我有以下XML文件,包含2个“Customer”类型的子实体:

<?xml version="1.0" encoding="utf-8"?>
<p:CustomerElement xmlns:p="http://www.dog.com/customer" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:schemaLocation="http://www.dog.com/customer Customer.xsd"> 
 <Customer>
    <Sender>
      <transmitDate>0016-00-01T00:00:00</transmitDate>
      <transmitter>Dog ETL v0.0</transmitter>
      <dealerCode><![CDATA[X000]]></dealerCode>
      <DMSSystem><![CDATA[WWW]]></DMSSystem>
      <DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
    </Sender>
    <Identifier>
      <updateInd><![CDATA[A]]></updateInd>
      <dealerCustNumber><![CDATA[HP]]></dealerCustNumber>
      <dealerCustName><![CDATA[COMPUTERS]]></dealerCustName>
      <phoneNumber><![CDATA[000 661 0000]]></phoneNumber>
      <faxNumber><![CDATA[000 000 0601]]></faxNumber>
      <email xsi:nil="true" />
      <customerType><![CDATA[R]]></customerType>
      <activeCustomerInd>false</activeCustomerInd>
      <parentCustomerNumber xsi:nil="true" />
      <primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
      <preferredLanguage><![CDATA[ENG]]></preferredLanguage>
      <dealerDateInSystem>0000-01-11T00:00:00</dealerDateInSystem>
      <dealerLastUpdatedDate>0015-00-05T00:00:00</dealerLastUpdatedDate>
    </Identifier>
    <LoDogion>
      <address0><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address0>
      <address3><![CDATA[AS PER JOHN DOE  FEB SAB/15]]></address3>
      <city><![CDATA[BAM]]></city>
      <postalCode><![CDATA[5S 15]]></postalCode>
      <state><![CDATA[AB]]></state>
      <country><![CDATA[CA]]></country>
      <loDogion><![CDATA[FLAGGED FOR DELETION]]></loDogion>
      <addressType><![CDATA[M]]></addressType>
    </LoDogion>
    <Division>
      <divisionCode><![CDATA[G]]></divisionCode>
      <divisionName><![CDATA[BOOM]]></divisionName>
      <IndustryCode>
        <industryCode><![CDATA[Q00]]></industryCode>
        <primaryIndustryCodeInd>true</primaryIndustryCodeInd>
      </IndustryCode>
      <SalesRep>
        <number><![CDATA[XXX]]></number>
        <name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
        <type><![CDATA[M]]></type>
        <par>0</par>
        <email xsi:nil="true" />
        <phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
      </SalesRep>
    </Division>
  </Customer>
 <Customer>
    <Sender>
      <transmitDate>0016-00-01T00:00:00</transmitDate>
      <transmitter>Dog ETL v0.0</transmitter>
      <dealerCode><![CDATA[000]]></dealerCode>
      <DMSSystem><![CDATA[WWW]]></DMSSystem>
      <DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
    </Sender>
    <Identifier>
      <updateInd><![CDATA[A]]></updateInd>
      <dealerCustNumber><![CDATA[HP]]></dealerCustNumber>
      <dealerCustName><![CDATA[COMPUTERS]]></dealerCustName>
      <phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
      <faxNumber><![CDATA[000 000 0000]]></faxNumber>
      <email xsi:nil="true" />
      <customerType><![CDATA[R]]></customerType>
      <activeCustomerInd>false</activeCustomerInd>
      <parentCustomerNumber xsi:nil="true" />
      <primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
      <preferredLanguage><![CDATA[ENG]]></preferredLanguage>
      <dealerDateInSystem>0000-01-11T00:00:00</dealerDateInSystem>
      <dealerLastUpdatedDate>0015-00-05T00:00:00</dealerLastUpdatedDate>
    </Identifier>
    <LoDogion>
      <address0><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address0>
      <address3><![CDATA[AS PER JOHN DOE  FEB ]]></address3>
      <city><![CDATA[BAM]]></city>
      <postalCode><![CDATA[Q5 15]]></postalCode>
      <state><![CDATA[AA]]></state>
      <country><![CDATA[AA]]></country>
      <loDogion><![CDATA[FLAGGED FOR DELETION]]></loDogion>
      <addressType><![CDATA[M]]></addressType>
    </LoDogion>
    <Division>
      <divisionCode><![CDATA[G]]></divisionCode>
      <divisionName><![CDATA[BOOM]]></divisionName>
      <IndustryCode>
        <industryCode><![CDATA[Q00]]></industryCode>
        <primaryIndustryCodeInd>true</primaryIndustryCodeInd>
      </IndustryCode>
      <SalesRep>
        <number><![CDATA[XXX]]></number>
        <name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
        <type><![CDATA[M]]></type>
        <par>0</par>
        <email xsi:nil="true" />
        <phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
      </SalesRep>
    </Division>
  </Customer>
</p:CustomerElement>

我有以下代码,它会转储此文件中的每个元素:

package com.dog.data;

import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;


public class ParseXmlFile {
    public static void main(String[] args)
    {
        try
        {
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            dbf.setNamespaceAware(true);
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document doc = db.parse("/tmp/single_customer.xml");
            System.out.printf("Version = %s%n", doc.getXmlVersion());
            System.out.printf("Encoding = %s%n", doc.getXmlEncoding());
            System.out.printf("Standalone = %b%n%n", doc.getXmlStandalone());
            if (doc.hasChildNodes())
            {
                NodeList nl = doc.getChildNodes();
                for (int i = 0; i < nl.getLength(); i++)
                {
                    Node node = nl.item(i);
                    if (node.getNodeType() == Node.ELEMENT_NODE)
                        dump((Element) node);
                }
            }
        }
        catch (IOException ioe)
        {
            System.err.println("IOE: " + ioe);
        }
        catch (SAXException saxe)
        {
            System.err.println("SAXE: " + saxe);
        }
        catch (FactoryConfigurationError fce)
        {
            System.err.println("FCE: " + fce);
        }
        catch (ParserConfigurationException pce)
        {
            System.err.println("PCE: " + pce);
        }
    }
    static void dump(Element e)
    {
        System.out.printf("Element: %s, %s, %s, %s%n", e.getNodeName(),
                e.getLocalName(), e.getPrefix(),
                e.getNamespaceURI());
        NamedNodeMap nnm = e.getAttributes();
        if (nnm != null)
            for (int i = 0; i < nnm.getLength(); i++)
            {
                Node node = nnm.item(i);
                Attr attr = e.getAttributeNode(node.getNodeName());
                System.out.printf("  Attribute %s = %s%n", attr.getName(), attr.getValue());
            }
        NodeList nl = e.getChildNodes();
        for (int i = 0; i < nl.getLength(); i++)
        {
            Node node = nl.item(i);
            if (node instanceof Element)
                dump((Element) node);
        }
    }
}

我的目标是从主文件中解析单个“客户”实体,并为每个这样的实体创建一个单独的XML文件。

有没有办法修改我当前的代码来完成这项任务?

1 个答案:

答案 0 :(得分:1)

使用标识Transformer将每个Customer节点直接转换为文件:

Transformer transformer =
    TransformerFactory.newInstance().newTransformer();

int customerNumber = 0;
NodeList nl = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
    Node node = nl.item(i);
    if (node.getNodeType() == Node.ELEMENT_NODE) {
        customerNumber++;
        File file = new File("/tmp/customer" + customerNumber + ".xml");
        transformer.transform(
            new DOMSource(node), new StreamResult(file));
    }
}