如何使用java

时间:2016-09-23 08:34:25

标签: java xml encoding utf-8 utf-16

我正在获取ucs-2编码的XML文件。我想使用java代码将此编码转换为UTF-8或UTF -16或ANSI。

你能帮忙吗?

1 个答案:

答案 0 :(得分:0)

我必须做类似的事情,这就是我提出的(我删除了一些方法,但这对你的用例来说已经足够了)。 BTW,据我所知,UCS-2可能与UTF-16相同(假设字节顺序相同)

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;



enum EncodingType { 
    UTF8(0),
    UTF16BE(1), 
    UTF16LE(2), 
    ISO_8859_1(3),
    ISO_8859_2(4),
    UNKNOWN(5);
    private final int val;
    EncodingType(int val){ 
        this.val= val;
    }
    public int getIntValue(){
        return val;
    }
};

public class TextConverter{

    public  EncodingType encodingType;
    private EncodingType inputEncoding = EncodingType.UTF8;
    private EncodingType outputEncoding = EncodingType.UTF8;

    public final static String[] encodingNames = { "UTF-8","UTF-16BE","UTF-16LE", "ISO-8859-1","ISO-8859-2", "UNKNOWN" };

//the check methods are only required for querying file encodings but    don't fully rely on them because not all encodings have header bytes and you can change encoding on a file
    private final static boolean checkUTF8(byte[] header){

        return ((header[0]&0xFF)==0xEF && (header[1]&0xFF)==0xBB && (header[2]&0xFF)==0xBF)?true:false;
    }
    private final static boolean checkUTF16BE(byte[] header){

        return ((header[0]&0xFF)==0xFE && (header[1]&0xFF)==0xFF)?true:false;
    }
    private final static boolean checkUTF16LE(byte[] header){

        return ((header[0]&0xFF)==0xFF && (header[1]&0xFE)==0xFE)?true:false;
    }
    public EncodingType getInputEncoding(){
        return inputEncoding;
    }
    public EncodingType getOutputEncoding(){
        return outputEncoding;
    }
    public void setInputEncoding(EncodingType enc){
        this.inputEncoding = enc;
    }
    public void setOutputEncoding(EncodingType enc){
        this.outputEncoding = enc;
    }

    /**
     * writes a file from a string using the encoding specified in outputEncoding member variable
     * @param fileName
     * @param content
     * @throws IOException
     */
    public void writeFile(String fileName, String content)throws IOException{
        BufferedWriter bw=null;
        try {
            File file = new File(fileName);

            bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), encodingNames[outputEncoding.getIntValue()])) ;
            bw.write(content);


        }
        catch(Exception e){
            System.out.println(e);

        }finally  {
            if(bw!=null)
                bw.close();
        }
    }
    /**
     * this method reads a file and converts it to a string using the encoding specified in inputEncoding member variable
     * use the setInputEncoding(EncodingType ) to set the encoding
     * @param fileName
     * @return
     * @throws IOException
     */
    public  String readFile(String fileName) throws IOException{

        String fileContent="";
            String del =  System.getProperty("line.separator");

        BufferedReader br=null;                   

        String encoding = encodingNames[inputEncoding.getIntValue()];
        try {
            File file = new File(fileName);
            br = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding)) ;
            String line = null;

            for ( line = br.readLine(); line != null; line = br.readLine())
            {
                fileContent+=(line+del);
            }
        }
        catch(Exception e){
            System.out.println(e);
        }finally  {
            if(br!=null)
                br.close();
        }
        /*String converted = convertToAllowedChars(fileContent);


        System.out.println("FILE CONTENT");   
        System.out.println(fileContent);*/

        return fileContent;

    }

}

您还可以将所有成员设置为静态,因为这可能对您更有意义。当然,您可以以任何合适的方式修改此代码。