org.xml.sax.SAXParseException; publicId和systemId之间需要空格

时间:2017-10-26 00:10:55

标签: java xml jaxb sax

我正在使用JavaFX在Eclipse Oxygen中编写Java桌面应用程序。我正在尝试读取包含XML文档的URL但我收到错误;

org.xml.sax.SAXParseException; White spaces are required between publicId and systemId.

脚本看起来像这样;

private static Document loadTestDocument(String url) {
    try {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        Document k = factory.newDocumentBuilder().parse(new URL(url).openStream());
        return k;
    } catch (Exception e) {
        e.printStackTrace();
        // TODO Auto-generated catch block
        Alert alert = new Alert(AlertType.ERROR);
        alert.setTitle("Error");
        alert.setHeaderText("Could not load data");
        alert.setContentText("Could not load data from file:\n" + e.getMessage());

        alert.showAndWait();
        return null;
    }
}

XML文档非常基础,看起来像这样;

<?xml version="1.0" encoding="UTF-8"?><STOREITEMS><PRODUCT ITEM='R7876' NAME='REMOVED'>


<STOCK>In Stock</STOCK></PRODUCT>
<PRODUCT ITEM='BR122293' NAME='REMOVED'>


<STOCK>In Stock</STOCK></PRODUCT>
<PRODUCT ITEM='REMOVED' NAME='REMOVED'>


<STOCK>In Stock</STOCK></PRODUCT>
<PRODUCT ITEM='ASTRO' NAME='REMOVED'>


<STOCK>In Stock</STOCK></PRODUCT>
<PRODUCT ITEM='3002028110' NAME='REMOVED'>


<STOCK>In Stock</STOCK></PRODUCT>
<PRODUCT ITEM='0340' NAME='REMOVED'>


<STOCK>No Stock.</STOCK></PRODUCT>
<PRODUCT ITEM='570031' NAME='REMOVED'> ...

然后我将使用Unmarshaller或JAXB将其转换为这样的文档;

private static StoreItems loadStoreItemsFromXMLDocument(Document doc) {
    try {
        JAXBContext context = JAXBContext
                .newInstance(StoreItems.class);
        Unmarshaller um = context.createUnmarshaller();
        // Reading XML from the file and unmarshalling.
        StoreItems storeItems = (StoreItems) um.unmarshal(doc);
        ...
        return storeItems;

    } catch (Exception e) { // catches ANY exception
        e.printStackTrace();
        return null;
    }
}

这很好用。然后,当我今天继续处理该项目时,我开始收到错误。文件发生了变化。

更新 这是我使用fiddler在HexView中获得的;

HTTP/1.1 200 OK
Date: Thu, 26 Oct 2017 00:35:35 GMT
Server: nginx
Last-Modified: Wed, 25 Oct 2017 23:01:06 GMT
ETag: "8dbd7-55c6707d75e08"
Accept-Ranges: bytes
Content-Length: 580567
Cache-Control: max-age=2592000
Expires: Sat, 25 Nov 2017 00:35:35 GMT
Keep-Alive: timeout=1, max=100
Connection: Keep-Alive
Content-Type: application/xml

<?xml version="1.0" encoding="UTF-8"?><STOREITEMS><PRODUCT ITEM='R7876' NAME='Rimba Electro Stimulation Gloves Pair'>...

我相信这是HexView上面的Hex形式;

byte[] arrOutput = { 0x48, 0x54, 0x54, 0x50, 0x2F, 0x31, 0x2E, 0x31, 0x20, 0x32, 0x30, 0x30, 0x20, 0x4F, 0x4B, 0x0D, 0x0A, 0x44, 0x61, 0x74, 0x65, 0x3A, 0x20, 0x54, 0x68, 0x75, 0x2C, 0x20, 0x32, 0x36, 0x20, 0x4F, 0x63, 0x74, 0x20, 0x32, 0x30, 0x31, 0x37, 0x20, 0x30, 0x30, 0x3A, 0x33, 0x35, 0x3A, 0x33, 0x35, 0x20, 0x47, 0x4D, 0x54, 0x0D, 0x0A, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3A, 0x20, 0x6E, 0x67, 0x69, 0x6E, 0x78, 0x0D, 0x0A, 0x4C, 0x61, 0x73, 0x74, 0x2D, 0x4D, 0x6F, 0x64, 0x69, 0x66, 0x69, 0x65, 0x64, 0x3A, 0x20, 0x57, 0x65, 0x64, 0x2C, 0x20, 0x32, 0x35, 0x20, 0x4F, 0x63, 0x74, 0x20, 0x32, 0x30, 0x31, 0x37, 0x20, 0x32, 0x33, 0x3A, 0x30, 0x31, 0x3A, 0x30, 0x36, 0x20, 0x47, 0x4D, 0x54, 0x0D, 0x0A, 0x45, 0x54, 0x61, 0x67, 0x3A, 0x20, 0x22, 0x38, 0x64, 0x62, 0x64, 0x37, 0x2D, 0x35, 0x35, 0x63, 0x36, 0x37, 0x30, 0x37, 0x64, 0x37, 0x35, 0x65, 0x30, 0x38, 0x22, 0x0D, 0x0A, 0x41, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2D, 0x52, 0x61, 0x6E, 0x67, 0x65, 0x73, 0x3A, 0x20, 0x62, 0x79, 0x74, 0x65, 0x73, 0x0D, 0x0A, 0x43, 0x6F, 0x6E, 0x74, 0x65, 0x6E, 0x74, 0x2D, 0x4C, 0x65, 0x6E, 0x67, 0x74, 0x68, 0x3A, 0x20, 0x35, 0x38, 0x30, 0x35, 0x36, 0x37, 0x0D, 0x0A, 0x43, 0x61, 0x63, 0x68, 0x65, 0x2D, 0x43, 0x6F, 0x6E, 0x74, 0x72, 0x6F, 0x6C, 0x3A, 0x20, 0x6D, 0x61, 0x78, 0x2D, 0x61, 0x67, 0x65, 0x3D, 0x32, 0x35, 0x39, 0x32, 0x30, 0x30, 0x30, 0x0D, 0x0A, 0x45, 0x78, 0x70, 0x69, 0x72, 0x65, 0x73, 0x3A, 0x20, 0x53, 0x61, 0x74, 0x2C, 0x20, 0x32, 0x35, 0x20, 0x4E, 0x6F, 0x76, 0x20, 0x32, 0x30, 0x31, 0x37, 0x20, 0x30, 0x30, 0x3A, 0x33, 0x35, 0x3A, 0x33, 0x35, 0x20, 0x47, 0x4D, 0x54, 0x0D, 0x0A, 0x4B, 0x65, 0x65, 0x70, 0x2D, 0x41, 0x6C, 0x69, 0x76, 0x65, 0x3A, 0x20, 0x74, 0x69, 0x6D, 0x65, 0x6F, 0x75, 0x74, 0x3D, 0x31, 0x2C, 0x20, 0x6D, 0x61, 0x78, 0x3D, 0x31, 0x30, 0x30, 0x0D, 0x0A, 0x43, 0x6F, 0x6E, 0x6E, 0x65, 0x63, 0x74, 0x69, 0x6F, 0x6E, 0x3A, 0x20, 0x4B, 0x65, 0x65, 0x70, 0x2D, 0x41, 0x6C, 0x69, 0x76, 0x65, 0x0D, 0x0A, 0x43, 0x6F, 0x6E, 0x74, 0x65, 0x6E, 0x74, 0x2D, 0x54, 0x79, 0x70, 0x65, 0x3A, 0x20, 0x61, 0x70, 0x70, 0x6C, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x2F, 0x78, 0x6D, 0x6C, 0x0D, 0x0A, 0x0D, 0x0A, 0xEF, 0xBB, 0xBF, 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x20, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x3D, 0x22, 0x55, 0x54, 0x46, 0x2D, 0x38, 0x22, 0x3F, 0x3E, 0x3C, 0x53, 0x54, 0x4F, 0x52, 0x45, 0x49, 0x54, 0x45, 0x4D, 0x53, 0x3E, 0x3C, 0x50, 0x52, 0x4F, 0x44, 0x55, 0x43, 0x54, 0x20, 0x49, 0x54, 0x45, 0x4D, 0x3D, 0x27, 0x52, 0x37, 0x38, 0x37, 0x36, 0x27, 0x20, 0x4E, 0x41, 0x4D, 0x45, 0x3D, 0x27, 0x52, 0x69, 0x6D, 0x62, 0x61, 0x20, 0x45, 0x6C, 0x65, 0x63, 0x74, 0x72, 0x6F, 0x20, 0x53, 0x74, 0x69, 0x6D, 0x75, 0x6C, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x20, 0x47, 0x6C, 0x6F, 0x76, 0x65, 0x73, 0x20, 0x50, 0x61, 0x69, 0x72, 0x27, 0x3E, 0x0A, 0x0A, 0x0A, 0x3C, 0x53, 0x54, 0x4F, 0x43, 0x4B, 0x3E, 0x49, 0x6E, 0x20, 0x53, 0x74, 0x6F, 0x63, 0x6B, 0x3C, 0x2F, 0x53, 0x54, 0x4F, 0x43, 0x4B, 0x3E, 0x3C, 0x2F, 0x50, 0x52, 0x4F, 0x44, 0x55, 0x43, 0x54, 0x3E, 0x0A };

1 个答案:

答案 0 :(得分:0)

您所看到的是Unicode“零宽度空间”字符,代码点0xFEFF,当它出现在文件的开头时是字节顺序标记,它有助于接收方确定是否流是big-endian或little-endian。

服务器已将其编码为UTF-8,从而产生字符串0xEFBBBF,fiddler将其“帮助”(并且错误地)解释为ISO-8859-1。

如果您控制服务器,请让它首先不发送BOM。此外,HTTP响应不指定UTF-8,它应该。