我正在获取ucs-2编码的XML文件。我想使用java代码将此编码转换为UTF-8或UTF -16或ANSI。
你能帮忙吗?
答案 0 :(得分:0)
我必须做类似的事情,这就是我提出的(我删除了一些方法,但这对你的用例来说已经足够了)。 BTW,据我所知,UCS-2可能与UTF-16相同(假设字节顺序相同)
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
enum EncodingType {
UTF8(0),
UTF16BE(1),
UTF16LE(2),
ISO_8859_1(3),
ISO_8859_2(4),
UNKNOWN(5);
private final int val;
EncodingType(int val){
this.val= val;
}
public int getIntValue(){
return val;
}
};
public class TextConverter{
public EncodingType encodingType;
private EncodingType inputEncoding = EncodingType.UTF8;
private EncodingType outputEncoding = EncodingType.UTF8;
public final static String[] encodingNames = { "UTF-8","UTF-16BE","UTF-16LE", "ISO-8859-1","ISO-8859-2", "UNKNOWN" };
//the check methods are only required for querying file encodings but don't fully rely on them because not all encodings have header bytes and you can change encoding on a file
private final static boolean checkUTF8(byte[] header){
return ((header[0]&0xFF)==0xEF && (header[1]&0xFF)==0xBB && (header[2]&0xFF)==0xBF)?true:false;
}
private final static boolean checkUTF16BE(byte[] header){
return ((header[0]&0xFF)==0xFE && (header[1]&0xFF)==0xFF)?true:false;
}
private final static boolean checkUTF16LE(byte[] header){
return ((header[0]&0xFF)==0xFF && (header[1]&0xFE)==0xFE)?true:false;
}
public EncodingType getInputEncoding(){
return inputEncoding;
}
public EncodingType getOutputEncoding(){
return outputEncoding;
}
public void setInputEncoding(EncodingType enc){
this.inputEncoding = enc;
}
public void setOutputEncoding(EncodingType enc){
this.outputEncoding = enc;
}
/**
* writes a file from a string using the encoding specified in outputEncoding member variable
* @param fileName
* @param content
* @throws IOException
*/
public void writeFile(String fileName, String content)throws IOException{
BufferedWriter bw=null;
try {
File file = new File(fileName);
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), encodingNames[outputEncoding.getIntValue()])) ;
bw.write(content);
}
catch(Exception e){
System.out.println(e);
}finally {
if(bw!=null)
bw.close();
}
}
/**
* this method reads a file and converts it to a string using the encoding specified in inputEncoding member variable
* use the setInputEncoding(EncodingType ) to set the encoding
* @param fileName
* @return
* @throws IOException
*/
public String readFile(String fileName) throws IOException{
String fileContent="";
String del = System.getProperty("line.separator");
BufferedReader br=null;
String encoding = encodingNames[inputEncoding.getIntValue()];
try {
File file = new File(fileName);
br = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding)) ;
String line = null;
for ( line = br.readLine(); line != null; line = br.readLine())
{
fileContent+=(line+del);
}
}
catch(Exception e){
System.out.println(e);
}finally {
if(br!=null)
br.close();
}
/*String converted = convertToAllowedChars(fileContent);
System.out.println("FILE CONTENT");
System.out.println(fileContent);*/
return fileContent;
}
}
您还可以将所有成员设置为静态,因为这可能对您更有意义。当然,您可以以任何合适的方式修改此代码。