无法将StAX输入转换为DOM DocumentFragment

时间:2016-08-16 18:14:54

标签: java xml linux dom stax

我编写了一个测试Java程序,它使用StAX遍历一个简单的input.xml文件,并将组件转换为DOM DocumentFragments,然后输出到一个新文件(output.xml)。我在更新的服务器版本上遇到的问题是我的代码中转换函数的NullPointerException。我不明白为什么。

错误如下:

java.lang.NullPointerException
        at net.sf.saxon.dom.DOMWriter.characters(DOMWriter.java:218)
        at net.sf.saxon.event.TreeReceiver.characters(TreeReceiver.java:277)
        at net.sf.saxon.pull.PullPushTee.copyEvent(PullPushTee.java:117)
        at net.sf.saxon.pull.PullPushTee.next(PullPushTee.java:72)
        at net.sf.saxon.pull.PullConsumer.consume(PullConsumer.java:42)
        at net.sf.saxon.pull.PullPushCopier.copy(PullPushCopier.java:44)
        at net.sf.saxon.event.Sender.sendPullSource(Sender.java:542)
        at net.sf.saxon.event.Sender.send(Sender.java:204)
        at net.sf.saxon.jaxp.IdentityTransformer.transform(IdentityTransformer.java:366)
        at XMLTest.getNextElement(XMLTest.java:66)
        at XMLTest.main(XMLTest.java:45)

我的测试程序的代码(可以编译)如下(75行):

import java.io.*;
import java.nio.file.*;
import javax.xml.parsers.*;
import javax.xml.stream.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stax.StAXSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.*;

public class XMLTest {

    static XMLStreamWriter writer;
    static XMLStreamReader reader;
    static Document doc;
    static Transformer transformer;

    //main method
    public static void main(String[] args) {
        System.out.println("XML Test");
        System.out.println("========");

        try {
            //create reader
            XMLInputFactory inFactory = XMLInputFactory.newInstance();
            BufferedReader input = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get("./input.xml"))));
            reader = inFactory.createXMLStreamReader(input);

            //create writer
            XMLOutputFactory outFactory = XMLOutputFactory.newInstance();
            BufferedWriter output = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get("./output.xml"))));
            writer = outFactory.createXMLStreamWriter(output);
            writer.writeStartDocument(); 
            writer.writeStartElement("documents");
            writer.writeCharacters("\n");

            //create document and transformer
            doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
            transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");

            //loop through input.xml
            for (int i=0; i<3; i++) {
                //call method to get <doc> element
                DocumentFragment docElement = getNextElement("doc");

                //transform retreived <doc> xmlfragment into output.xml
                transformer.transform(new DOMSource(docElement), new StreamResult(output)); //USING STREAMRESULT AS StAXResult WILL IGNORE TRANSFORMER PROPERTY
                System.out.println(docElement);
            }
        }
        catch (Exception e) { e.printStackTrace(); }
    }

    //method to retreive a fragment of xml identified by the elementTag passed to it, e.g. getNextElement("doc") should return a DocumentFragment <doc>....</doc>
    static DocumentFragment getNextElement(String elementTag) {
        try {
            DocumentFragment frag = doc.createDocumentFragment();           
            while (reader.hasNext()) {
                if (reader.getEventType() == XMLStreamReader.START_ELEMENT && reader.getLocalName().equals(elementTag)) {
                    System.out.println("Found: " + elementTag);
                    transformer.transform(new StAXSource(reader), new DOMResult(frag));
                    return frag;
                }
                reader.next();
            }
            System.out.println("Returned empty fragment");
            return frag;
        }
        catch (Exception e) { e.printStackTrace(); }
        return null;
    }

}

我的样本input.xml文件如下:

<?xml version="1.0" ?>
<add>
<doc>
    <field name="UID">0000001</field>
    <field name="company">New York Corp</field>
    <field name="datetime">2000-01-01T07:00:00Z</field>
    <field name="title">Research Update</field>
    <field name="url">www.green.com</field>
    <field name="notice">example notice example notice</field>
</doc>
<doc>
    <field name="UID">0000002</field>
    <field name="company">London Limited</field>
    <field name="datetime">2001-01-01T07:00:00Z</field>
    <field name="title">Warning</field>
    <field name="url">www.purple.com</field>
    <field name="notice">test 123</field>
</doc>
<doc>
    <field name="UID">0000003</field>
    <field name="company">Tokyo PLC</field>
    <field name="datetime">2004-01-01T07:00:00Z</field>
    <field name="title">Results</field>
    <field name="url">www.red.com</field>
    <field name="notice">These reults</field>
</doc>
</add>

顺便说一句,结果在不同的机器上是不同的。在Debian Wheezy(7.8)上,使用java版本“1.8.0_45”,结果如预期。也就是说,输入被分块抓取并输入到输出文件中。

在Debian Jessie(8.5)上,使用java版本“1.8.0_101”,每次迭代传输发生时我都会得到上面的NullPointerException。如果我在同一台机器上编译并在Java 1.8.0_45上运行它会保持断开状态。考虑到错误类型,感觉这是一个Java问题而不是OS依赖。但我的调试尝试让我无处可去。

1 个答案:

答案 0 :(得分:0)

这是我使用XPath和VTD-XML进行拆分的代码......它比你提供的代码更简单直观....

import com.ximpleware.*;
import java.io.*;
public class splitXML {
    public static void main(String[] args) throws VTDException, IOException {
        VTDGen vg = new VTDGen();
        if (!vg.parseFile("d:\\xml\\input.xml", false)){
            System.out.println("error");
            return;
        }
        VTDNav vn = vg.getNav();
        AutoPilot ap = new AutoPilot(vn);
        ap.selectXPath("/add/doc");
        int i=0,n=0;
        FileOutputStream fos =null;
        while((i=ap.evalXPath())!=-1){
            fos = new FileOutputStream("d:\\xml\\output"+(++n)+".xml");
            long l = vn.getElementFragment();
            fos.write(vn.getXML().getBytes(), (int)l, (int)(l>>32));
            fos.close();
        }
    }
}