由BOM破坏的JAXB2 Mtom附件

时间:2011-01-13 10:28:56

标签: java jaxb spring-ws byte-order-mark mtom

我正在使用JAXB2在Spring-WS中执行OXM。我指定的XSD需要将大型XML文件附加到soap消息,因此我使用MTOM传输文件并在我的JAXB2Marshaller上启用了MTOM。

当JAXB2编组MTOM附件时,它具有预期的mime类型的text / xml,它将该元素作为javax.xml.transform.Source对象传递。经过一些搜索后,我能够找到如何将Source对象发送到文件。

final Source source = request.getSource();
StreamSource streamSource = (StreamSource) source;
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
File file = new File ("/tempxmlfile.xml");
try{
    transformer.transform(streamSource, new StreamResult(file));
    LOG.info("File saved in "+file.getAbsolutePath());
    }
catch(Exception ex){
        ex.getMessage();
    }

我遇到的问题是,当我发送UTF-8编码文件作为附件时,我收到以下错误:

[Fatal Error] :1:1: Content is not allowed in prolog.
ERROR:  'Content is not allowed in prolog.'

这是由文件中编码文本前面的字节顺序标记引起的,虽然UTF-8编码文件中不需要此BOM,但Unicode标准允许这样,但Java不支持UTF中的BOM -8编码流。

我可以通过发送没有BOM的文件来解决这个问题,但这不太可行,因为它会导致插入BOM的大多数Microsoft产品出现问题。

Sun / Oracle拒绝使用Streams解决此问题有很多变通方法,但它们都要求您访问Stream,JAXB2提供的源对象没有InputStream它只有Reader对象。有没有办法让我解决这个问题,或者用一个读取器包装Sources Reader对象,该读取器知道如何忽略UTF-8编码的BOM,或者改变JAXB2将附件读入源的方式,以便它可以忽略采用UTF-8编码的BOM。

提前致谢, 克雷格

1 个答案:

答案 0 :(得分:3)

诀窍是“标记”读者。如果您的阅读器不支持标记,您可以将其包装在BufferedReader中,该缓冲区执行:

选项#1 - 检查物料清单并将其删除

我相信我的原始代码错误地写了BOM。下面的源代码更有意义:

import java.io.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

public class Demo {

    private static char[] UTF32BE = {0x00, 0x00, 0xFE, 0xFF}; 
    private static char[] UTF32LE = {0xFF, 0xFE, 0x00, 0x00};
    private static char[] UTF16BE = {0xFE, 0xFF}; 
    private static char[] UTF16LE = {0xFF, 0xFE};
    private static char[] UTF8 = {0xEF, 0xBB, 0xBF};

    public static void main(String[] args) throws Exception {
        // Create an XML document with a BOM
        FileOutputStream fos = new FileOutputStream("bom.xml");
        writeBOM(fos, UTF16LE);

        OutputStreamWriter oswUTF8 = new OutputStreamWriter(fos, "UTF-8");
        oswUTF8.write("<root/>");
        oswUTF8.close();

        // Create a Source based on a Reader to simulate source.getRequest()
        StreamSource attachment = new StreamSource(new FileReader(new File("bom.xml")));

        // Wrap reader in BufferedReader so it will support marking
        Reader reader = new BufferedReader(attachment.getReader());

        // Remove the BOM
        removeBOM(reader);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer t = tf.newTransformer();
        t.transform(new StreamSource(reader), new StreamResult(System.out));
    }

    private static void writeBOM(OutputStream os, char[] bom) throws Exception {
        for(int x=0; x<bom.length; x++) {
            os.write((byte) bom[x]);
        }
    }

    private static void removeBOM(Reader reader) throws Exception {
        if(removeBOM(reader, UTF32BE)) {
            return;
        }
        if(removeBOM(reader, UTF32LE)) {
            return;
        }
        if(removeBOM(reader, UTF16BE)) {
            return;
        }
        if(removeBOM(reader, UTF16LE)) {
            return;
        }
        if(removeBOM(reader, UTF8)) {
            return;
        }
    }

    private static boolean removeBOM(Reader reader, char[] bom) throws Exception {
        int bomLength = bom.length;
        reader.mark(bomLength);
        char[] possibleBOM = new char[bomLength];
        reader.read(possibleBOM);
        for(int x=0; x<bomLength; x++) {
            if(bom[x] != possibleBOM[x]) {
                reader.reset();
                return false;
            }
        }
        return true;
    }

}

选项#2 - 查找'&lt;'并推动读者到那一点

阅读直到你点击'&lt;'利用标记/重置:

import java.io.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

public class Demo2 {

    private static char[] UTF32BE = {0x00, 0x00, 0xFE, 0xFF}; 
    private static char[] UTF32LE = {0xFF, 0xFE, 0x00, 0x00};
    private static char[] UTF16BE = {0xFE, 0xFF}; 
    private static char[] UTF16LE = {0xFF, 0xFE};
    private static char[] UTF8 = {0xEF, 0xBB, 0xBF};

    public static void main(String[] args) throws Exception {
        // Create an XML document with a BOM
        FileOutputStream fos = new FileOutputStream("bom.xml");
        writeBOM(fos, UTF16BE);

        OutputStreamWriter oswUTF8 = new OutputStreamWriter(fos, "UTF-8");
        oswUTF8.write("<root/>");
        oswUTF8.close();

        // Create a Source based on a Reader to simulate source.getRequest()
        StreamSource attachment = new StreamSource(new FileReader(new File("bom.xml")));

        // Wrap reader in BufferedReader so it will support marking
        Reader reader = new BufferedReader(attachment.getReader());

        // Remove the BOM
        removeBOM(reader);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer t = tf.newTransformer();
        t.transform(new StreamSource(reader), new StreamResult(System.out));
    }

    private static void writeBOM(OutputStream os, char[] bom) throws Exception {
        for(int x=0; x<bom.length; x++) {
            os.write((byte) bom[x]);
        }
    }

    private static Reader removeBOM(Reader reader) throws Exception {
        reader.mark(1);
        char[] potentialStart = new char[1];
        reader.read(potentialStart);
        if('<' == potentialStart[0]) {
            reader.reset();
            return reader;
        } else {
            return removeBOM(reader);
        }
    }

}