从文档转换为clob时xml中的特殊字符

时间:2016-11-08 08:44:10

标签: java xml oracle domdocument clob

我将clob中的xml从oracle传递给java源,然后签署xml并返回结果,但是返回的结果包含特殊字符。 f.e。

输入xml:

<a>žė</a>

输出xml:

<a>&#382;&#279;</a>

如果我尝试在java中打印出结果,则没有特殊字符,但是在oracle中我得到了这些。 如果我从第一个clob获得字符流,并分配给结果clob,则没有特殊字符,它们仅在我将文档转换为clob时出现。

重现的代码:

create or replace and compile java source named test_encoding as
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.BufferedReader;
import java.io.Writer;
import java.io.StringWriter;
import java.security.*;
import java.security.cert.X509Certificate;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import sun.misc.BASE64Encoder;
import sun.misc.BASE64Decoder;

public class test_encoding {

  public static Clob getxml(Clob inputClob, Clob outputClob) throws Exception{

    Document document = getDocument(inputClob);

    //outputClob = inputClob;
    //String inputString = getStringFromIS(readerToInputStream(inputClob.getCharacterStream()));
    String inputString = getStringFromIS(getInputStreamFromDocument(document));
    Writer writer = outputClob.setCharacterStream(1);
    writer.write(inputString.toCharArray());
    writer.close();

    return outputClob;

  }

  public static InputStream readerToInputStream(Reader reader) throws Exception {
    char[] charBuffer = new char[8 * 1024];
    StringBuilder builder = new StringBuilder();
    int numCharsRead;
    while ((numCharsRead = reader.read(charBuffer, 0, charBuffer.length)) != -1) {
      builder.append(charBuffer, 0, numCharsRead);
    }

    return new ByteArrayInputStream(builder.toString().getBytes("UTF-8"));
    }

  public static String getStringFromIS(InputStream is) throws Exception {
        BufferedReader in = new BufferedReader(new InputStreamReader(is, "UTF-8"));
        String str = "";
        String line;
        while ((line = in.readLine()) != null) {
            str = str.concat(line);
        }

        return str;
    }

  public static Document getDocument(Clob xmlClob) throws Exception {
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setNamespaceAware(true);

    return dbf.newDocumentBuilder().parse(readerToInputStream(xmlClob.getCharacterStream()));
  }

  public static InputStream getInputStreamFromElement(Element element) throws Exception {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        Source xmlSource = new DOMSource(element);
        Result outputTarget = new StreamResult(outputStream);
        TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
        InputStream is = new ByteArrayInputStream(outputStream.toByteArray());

        return is;
    }

  public static InputStream getInputStreamFromDocument(Document document) throws Exception {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        Source xmlSource = new DOMSource(document);
        Result outputTarget = new StreamResult(outputStream);
        TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
        InputStream is = new ByteArrayInputStream(outputStream.toByteArray());

        return is;
    }

}

plsql函数:

create or replace function test_encoding(
  p_input_clob clob,
  p_output_clop clob)
 --return varchar2 as
 return clob as
  --language java name 'SignXMLNew.signXml(java.sql.Clob, java.lang.String, java.lang.String, java.lang.String, java.lang.String) return java.lang.String';
  language java name 'test_encoding.getxml(java.sql.Clob, java.sql.Clob) return java.sql.Clob';

脚本:

declare
  l_input_xml clob := xmlType('<a>žė</a>').getClobVal();
  l_output_xml clob;
begin
  dbms_lob.createtemporary(l_output_xml, false);
  dbms_output.put_line(l_input_xml);
  l_output_xml := test_encoding(l_input_xml, l_output_xml);
  dbms_output.put_line(l_output_xml);
end;
/

更新

精确定位字符“žė”转换为html字符的位置。这是在转换文档时。我已经将文档转换为byte [],并且打印了十六进制表示,并且html特殊字符已经存在。我在Eclipse中也做了同样的事情,那里没有html字符。 F.E.

Result in oracle:  3C613E26233338323B26233237393B3C2F613E
Result in eclipse: 3C613EC5BEC4973C2F613E

我使用完全相同的函数将文档转换为byte []并打印出Oracle和Eclipse中的字节数组的十六进制表示。

将Document转换为byte []的函数:

public static byte[] getDocumentByteArray(Document doc) throws Exception {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    Source xmlSource = new DOMSource(doc);
    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
    transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
    Result outputTarget = new StreamResult(outputStream);
    transformer.transform(xmlSource, outputTarget);

    return outputStream.toByteArray();
}

public static String bytesToHex(byte[] bytes) {
    char[] hexArray = "0123456789ABCDEF".toCharArray();
    char[] hexChars = new char[bytes.length * 2];
    for (int j = 0; j < bytes.length; j++) {
        int v = bytes[j] & 0xFF;
        hexChars[j * 2] = hexArray[v >>> 4];
        hexChars[j * 2 + 1] = hexArray[v & 0x0F];
    }
    return new String(hexChars);
}

可能是变换器采用区域设置编码,例如在eclipse中它从我的pc的nls_lang进行编码,而在oracle中需要oracles服务器nls_lang。如果是这样,我如何指定使用哪种编码,因为它似乎

transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");

没有做到这一点。

1 个答案:

答案 0 :(得分:0)

在oracle中使用它时,Transformer似乎存在一些错误。我们通过使用DOMParser, XMLDocument and XMLElement找到了解决方法。如果使用这些代码重写所有代码的工作太多,您可以在返回值之前简单地使用这些代码转换xml。 f.e。

...
InputStream elementIS = getInputStreamFromElement(soapEnvelope);
retClob = setCharacterStream(elementIS, retClob);

return retClob;

public static InputStream getInputStreamFromElement(Element element) throws Exception {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    Source xmlSource = new DOMSource(element);
    Result outputTarget = new StreamResult(outputStream);
    TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
    InputStream is = new ByteArrayInputStream(outputStream.toByteArray());

    return is;
}

public static Clob setCharacterStream(InputStream inputStream, Clob clob) throws Exception{
    DOMParser parser = new DOMParser();
    parser.parse(inputStream);
    XMLDocument xmlDocument = parser.getDocument();
    XMLElement xmlPayload = (XMLElement) xmlDocument.getDocumentElement();
    Writer writer = clob.setCharacterStream(1);
    xmlPayload.print(writer);

    return clob;
}