如何使用java将XML文件拆分为多个XML文件

时间:2015-03-20 12:08:53

标签: java xml

我第一次在Java中使用XML文件,我需要一些帮助。我正在尝试使用Java

将XML文件拆分为多个XML文件
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<products>
    <product>
        <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
        <gtin>00027242816657</gtin>
        <price>2999.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple iPad 2 with Wi-Fi 16GB - iOS 5 - Black
        </description>
        <gtin>00885909464517</gtin>
        <price>399.0</price>
        <orderId>2343</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue
        </description>
        <gtin>00027242831438</gtin>
        <price>91.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple MacBook Air A 11.6" Mac OS X v10.7 Lion MacBook
        </description>
        <gtin>00885909464043</gtin>
        <price>1149.0</price>
        <orderId>2344</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Panasonic TC-L47E50 47" Smart TV Viera E50 Series LED
            HDTV</description>
        <gtin>00885170076471</gtin>
        <price>999.99</price>
        <orderId>2344</orderId>
        <supplier>Panasonic</supplier>
    </product>
</products>

我试图获得三个XML文档,如:

 <?xml version="1.0" encoding="UTF-8"?>
<products>
        <product>
            <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
            <gtin>00027242816657</gtin>
            <price currency="USD">2999.99</price>
            <orderid>2343</orderid>
        </product>
        <product>
            <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue</description>
            <gtin>00027242831438</gtin>
            <price currency="USD">91.99</price>
            <orderid>2343</orderid>
        </product>
</products>

每个供应商一个。我该怎么收到它?对此的任何帮助都会很棒。

7 个答案:

答案 0 :(得分:1)

确保将“inputFile”中的路径更改为您的文件以及输出部分:

StreamResult result = new StreamResult(new File("C:\xmls\" + supplier.trim() + ".xml"));

这是您的代码。

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class ExtractXml
{
    /**
     * @param args
     */
    public static void main(String[] args) throws Exception
    {
        String inputFile = "resources/products.xml";

        File xmlFile = new File(inputFile);
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(xmlFile);

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true); // never forget this!

        XPathFactory xfactory = XPathFactory.newInstance();
        XPath xpath = xfactory.newXPath();
        XPathExpression allProductsExpression = xpath.compile("//product/supplier/text()");
        NodeList productNodes = (NodeList) allProductsExpression.evaluate(doc, XPathConstants.NODESET);

        //Save all the products
        List<String> suppliers = new ArrayList<String>();
        for (int i=0; i<productNodes.getLength(); ++i)
        {
            Node productName = productNodes.item(i);

            System.out.println(productName.getTextContent());
            suppliers.add(productName.getTextContent());
        }

        //Now we create the split XMLs

        for (String supplier : suppliers)
        {
            String xpathQuery = "/products/product[supplier='" + supplier + "']";

            xpath = xfactory.newXPath();
            XPathExpression query = xpath.compile(xpathQuery);
            NodeList productNodesFiltered = (NodeList) query.evaluate(doc, XPathConstants.NODESET);

            System.out.println("Found " + productNodesFiltered.getLength() + 
                               " product(s) for supplier " + supplier);

            //We store the new XML file in supplierName.xml e.g. Sony.xml
            Document suppXml = dBuilder.newDocument();

            //we have to recreate the root node <products>
            Element root = suppXml.createElement("products"); 
            suppXml.appendChild(root);
            for (int i=0; i<productNodesFiltered.getLength(); ++i)
            {
                Node productNode = productNodesFiltered.item(i);

                //we append a product (cloned) to the new file
                Node clonedNode = productNode.cloneNode(true);
                suppXml.adoptNode(clonedNode); //We adopt the orphan :)
                root.appendChild(clonedNode);
            }

            //At the end, we save the file XML on disk
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            DOMSource source = new DOMSource(suppXml);

            StreamResult result =  new StreamResult(new File("resources/" + supplier.trim() + ".xml"));
            transformer.transform(source, result);

            System.out.println("Done for " + supplier);
        }
    }

}

答案 1 :(得分:0)

您可以在这里查看如何使用Java在Java中解析XML文档: DOM XML Parser Example

这里,如何编写新的XML文件: Create XML file using java

此外,您可以学习XPath以轻松选择节点:Java Xpath expression

如果表演不是您的目标,首先,一旦您加载了DOM和Xpath,就可以使用以下XPath查询检索xml文档中的所有供应商

//supplier/text()

你会得到类似的东西:

Text='Sony'
Text='Apple'
Text='Sony'
Text='Apple'
Text='Panasonic'

然后我会将这些结果放在ArraryList或其他任何内容中。第二步是该集合的迭代,并为每个项目查询XML输入文档,以便提取特定供应商的所有节点:

/products/product[supplier='Sony'] 

当然在java中你必须以动态的方式构建最后的xpath查询:

String xpathQuery = "/products/product/[supplier='" + currentValue + "']

之后,您将获得与您指定的供应商匹配的节点列表。下一步是构建新的XML DOM并将其保存在文件中。

答案 2 :(得分:0)

DOM解析器将消耗更多内存。我更喜欢使用SAX解析器来读取XML并写入。

答案 3 :(得分:0)

我喜欢Xmappr(https://code.google.com/p/xmappr/)的方法,你可以使用简单的注释:

首先是简单地保存Product-instances列表的根元素Products

@RootElement
public class Products {

    @Element
    public List<Product> product;
}

然后是产品类

@RootElement
public class Product {

   @Element
   public String description;

   @Element
   public String supplier;

   @Element
   public String gtin;

   @Element
   public String price;

   @Element
   public String orderId;
}

然后您只需从产品中获取产品实例:

public static void main(String[] args) throws FileNotFoundException {
    Reader reader = new FileReader("test.xml");
    Xmappr xm = new Xmappr(Products.class);
    Products products = (Products) xm.fromXML(reader);

    // fetch list of products
    List<Product> listOfProducts = products.product;

    // do sth with the products in the list
    for (Product product : listOfProducts) {
        System.out.println(product.description);
    }       
}

然后你可以随意对产品做任何事情(例如根据供应商对它们进行分类并将它们放到xml文件中)

答案 4 :(得分:0)

考虑此xml

<?xml version="1.0"?>
<SSNExportDocument xmlns="urn:com:ssn:schema:export:SSNExportFormat.xsd" Version="0.1" DocumentID="b482350d-62bb-41be-b792-8a9fe3884601-1" ExportID="b482350d-62bb-41be-b792-8a9fe3884601" JobID="464" RunID="3532468" CreationTime="2019-04-16T02:20:01.332-04:00" StartTime="2019-04-15T20:20:00.000-04:00" EndTime="2019-04-16T02:20:00.000-04:00">
    <MeterData MeterName="MUNI1-11459398" UtilDeviceID="11459398" MacID="00:12:01:fae:fe:00:d5:fc">
        <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
            <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:06.214-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                <Tier Number="0">
                    <Register Number="1" Summation="5949.1000" SummationUOM="GAL"/>
                </Tier>
            </RegisterRead>
        </RegisterData>
    </MeterData>
    <MeterData MeterName="MUNI4-11460365" UtilDeviceID="11460365" MacID="00:11:01:bc:fe:00:d3:f9">
        <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
            <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:11.082-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                <Tier Number="0">
                    <Register Number="1" Summation="136349.9000" SummationUOM="GAL"/>
                </Tier>
            </RegisterRead>
        </RegisterData>
    </MeterData>

我们可以使用JAXB将您的xml标记转换为对象。然后我们可以和他们一起玩。

File xmlFile = new File("input.xml");
jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
SSNExportDocument ssnExpDoc = (SSNExportDocument) jaxbUnmarshaller.unmarshal(xmlFile);
MeterData mD = new MeterData();
Map<String, List<MeterData>> meterMapper = new HashMap<String, List<MeterData>>(); // Phantom Reference

for (MeterData mData : ssnExpDoc.getMeterData()) {
            String meterFullName = mData.getMeterName();
            String[] splitMeterName = meterFullName.split("-");
            List<MeterData> _meterDataList = meterMapper.get(splitMeterName[0]);// o(1)
            if (_meterDataList == null) {
                _meterDataList = new ArrayList<>();
                _meterDataList.add(mData);
                meterMapper.put(splitMeterName[0], _meterDataList);
                _meterDataList = null;
            } else {
                _meterDataList.add(mData);
            }
        }

meterMapper包含针对对象列表的标签名称

然后将内容编组为

       JAXBContext jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);

        // Create Marshaller
        Marshaller jaxbMarshaller = jaxbContext.createMarshaller();

        // Required formatting??
        jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
        jaxbMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
        //jaxbMarshaller.setProperty("com.sun.xml.bind.xmlDeclaration", Boolean.FALSE);

        // Print XML String to Console

        StringWriter sw = new StringWriter();

        // Write XML to StringWriter
        jaxbMarshaller.marshal(employee, sw);

        // Verify XML Content
        String xmlContent = sw.toString();
        System.out.println(xmlContent);

答案 5 :(得分:0)

不是完美的解决方案,但在大多数情况下都有效。不得不玩一些字符串操作才能使它工作。基本上,此解决方案将给定元素的给定 XML 拆分并形成子 XML 并将其写入列表。

public static void main(String[] args) {
    java.io.File inputFile = new java.io.File("input.xml");
    String elementSplitString = "product";
    java.io.InputStream inputStream = null;

    try {
        

        inputStream = new java.io.BufferedInputStream(new java.io.FileInputStream(inputFile));

        javax.xml.stream.XMLInputFactory inputFactory = javax.xml.stream.XMLInputFactory.newInstance();
        javax.xml.stream.XMLOutputFactory outputFactory = javax.xml.stream.XMLOutputFactory.newInstance();
        javax.xml.stream.XMLEventReader reader = inputFactory.createXMLEventReader(inputStream);
        javax.xml.stream.XMLEventWriter writer = null;
        StringWriter parentXMLStringWriter = new StringWriter();
        javax.xml.stream.XMLEventWriter headerWriter = outputFactory.createXMLEventWriter(parentXMLStringWriter); 
        StringWriter stringWriter = null;
        String lastReadEvent = "";
        boolean splitElementFound = false;
        List<StringBuilder> list = new ArrayList<StringBuilder>();
        while (reader.hasNext()) {
            javax.xml.stream.events.XMLEvent event = reader.nextEvent();
            
            
            switch(event.getEventType()) {
                case javax.xml.stream.XMLStreamConstants.START_ELEMENT:
                    javax.xml.stream.events.StartElement startElement = (javax.xml.stream.events.StartElement)event;
                    if (startElement.getName().getLocalPart().equals(elementSplitString)) {
                        splitElementFound = true;
                        stringWriter = new StringWriter();
                        writer = outputFactory.createXMLEventWriter(stringWriter);
                        if (writer != null) writer.add(event);
                    } else if(writer != null)
                         writer.add(event);
                    
                    break;

                case javax.xml.stream.XMLStreamConstants.END_ELEMENT:
                    javax.xml.stream.events.EndElement endElement = (javax.xml.stream.events.EndElement)event;
                    if (endElement.getName().getLocalPart().equals(elementSplitString)) {
                        if (writer != null) writer.add(event);
                        
                        writer.close();
                        StringBuilder builder = new StringBuilder();
                        String parentXML = parentXMLStringWriter.toString();
                        builder.append(parentXML.subSequence(0, parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 1));
                        builder.append(stringWriter.toString());
                        builder.append(parentXML.substring(parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 2));
                        list.add(builder);
                        writer = null;
                    }else if(writer != null)
                        writer.add(event);
                    break;

                default:
                    if (writer != null) 
                        writer.add(event);
                    break;
            }
            if(!splitElementFound) {
                if(event instanceof javax.xml.stream.events.StartElement)
                    lastReadEvent = ((javax.xml.stream.events.StartElement)event).getName().getLocalPart();
                else if(event instanceof javax.xml.stream.events.EndElement)
                    lastReadEvent = ((javax.xml.stream.events.EndElement)event).getName().getLocalPart();
                headerWriter.add(event);
            }else {
                headerWriter.close();
            }

        }
        
        headerWriter = null;
        reader.close();
        if (writer != null) writer.close();
    } catch(Throwable ex) {
        ex.printStackTrace();
    } finally {
        if (inputStream != null) {
            try {
                inputStream.close();
            } catch (java.io.IOException ex) {
                // do nothing
            }
        }
    }
} 

答案 6 :(得分:-1)

如果你有XML方言的Schema(XSD),那么Dom的替代方案就是JAXB。