如何用Java读取多种语言的文件?

时间:2012-07-05 07:12:46

标签: java localization

我正在尝试用Java阅读和处理几种语言的词典。那么如何根据这个安排我的代码呢?感谢。

要将它们变为大写,我使用了这个:

     String str_uc=str.toUpperCase(Locale.ENGLISH);

但它不支持其他语言,我正在尝试阅读。

然而,主要问题是我在将它们变成大写之前无法正确读取其他语言的文件。

这是我到目前为止所做的。适用于英语词典。

  import java.util.ArrayList;
  import java.util.Locale;
  import java.io.*;
  public class XmlCreating {


static ArrayList<Character> keywordletters = new ArrayList<Character>();
static ArrayList<Character> wordletters = new ArrayList<Character>();
static ArrayList<String> threeletter = new ArrayList<String>();
static ArrayList<String> fourletter = new ArrayList<String>();
static ArrayList<String> fiveletter = new ArrayList<String>();
static ArrayList<String> sixletter = new ArrayList<String>();
static ArrayList<String> sevenletter = new ArrayList<String>();
static ArrayList<String> words = new ArrayList<String>();
static ArrayList<String> allletters = new ArrayList<String>();

public static boolean hasApostrophe(String line){
    for(int i=0; i<line.length();i++) {
        if(line.charAt(i)=='\'' || line.charAt(i)=='-' )
          return false;         
    }
    return true;        
}



public static void findLetters(String word, ArrayList<Character> ary) {
    for(int i=0; i<word.length(); i++) {
        ary.add(word.charAt(i));    
    }
}

public static boolean consistLetters(String keyword,String word) {


    keywordletters.clear();
    wordletters.clear();
    findLetters(keyword,keywordletters);
    findLetters(word,wordletters);

    boolean found = false;
    for(int i=0; i<wordletters.size(); i++) {
        found=false;
        for(int j=0; j<keywordletters.size(); j++) {

            if(keywordletters.get(j)!='\''){
                if(wordletters.get(i)==keywordletters.get(j)) {
                    keywordletters.set(j,'\'');
                    found=true;
                    break;
                }           
            }
        }

        if(found!=true)
            return false;
    }

    return found;
}



public static void findWords(String keyword){

          words.clear();

          for(int i=0; i<threeletter.size(); i++)
          {
            if(consistLetters(keyword,threeletter.get(i))==true) 
              words.add(threeletter.get(i));

          }
          for(int i=0; i<fourletter.size(); i++){
              if(consistLetters(keyword,fourletter.get(i))==true) 
                  words.add(fourletter.get(i));
          }

          for(int i=0; i<fiveletter.size(); i++){
              if(consistLetters(keyword,fiveletter.get(i))==true) 
                  words.add(fiveletter.get(i)); 
          }

          for(int i=0; i<sixletter.size(); i++){
              if(consistLetters(keyword,sixletter.get(i))==true) 
                  words.add(sixletter.get(i));
          }

          for(int i=0; i<sevenletter.size(); i++){
              if(consistLetters(keyword,sevenletter.get(i))==true) 
                  words.add(sevenletter.get(i));
          }   

    }


public static void main(String args[]) { 

    //Locale.setDefault(new Locale("tr","TR"));
    try {
      FileInputStream fstream1 = new FileInputStream("en-GB.dic");
      DataInputStream in = new DataInputStream(fstream1);
      BufferedReader br = new BufferedReader(new InputStreamReader(in));
      String str;
      while ((str = br.readLine()) != null) {
         String str_uc=str.toUpperCase(Locale.ENGLISH);

          if(hasApostrophe(str_uc)){
              allletters.add(str_uc);
          if(str.length()==3)
              threeletter.add(str_uc);
          else if(str.length()==4)
              fourletter.add(str_uc);
          else if(str.length()==5)
              fiveletter.add(str_uc);
          else if(str.length()==6)
              sixletter.add(str_uc);
          else if(str.length()==7)
              sevenletter.add(str_uc);
       }
      }
      in.close();

    }
    catch (Exception e) {
      System.err.println(e);
    }

    System.out.println(sevenletter.size());
    System.out.println(sixletter.size());
    System.out.println(fiveletter.size());
    System.out.println(allletters.size());


    int noOfXml=(int)(sevenletter.size()/10);
    int lastXml=(int)(sevenletter.size()%10);



    try{

        int a=0;
        int b=10;  
        for(int x=1;x<noOfXml+1;x++) {
        FileWriter fstream2 = new FileWriter(x+".xml");
        BufferedWriter out = new BufferedWriter(fstream2);

        out.write("<?xml version='1.0' encoding='utf-8' ?><dictionary>");


        for(int i=a;i<b;i++) {
              findWords(sevenletter.get(i));

              out.write("<ltr s='"+sevenletter.get(i)+"' w=");

              for(int j=0; j<words.size();j++) {

                  out.write("'"+words.get(j)+"'");
                  if(j<words.size()-1)
                      out.write(";");
                }
              out.write("/>");
        }
        a=b;
        b=b+10;


        out.write("</dictionary>");
        //Close the output stream
        out.close();

        }}catch (Exception e){
        System.err.println("Error: " + e.getMessage());
        }


    //for last five keywords
    if(lastXml!=0) {
        try{
                FileWriter fstream3 = new FileWriter((noOfXml+1)+".xml");
                BufferedWriter out1 = new BufferedWriter(fstream3);

                out1.write("<?xml version='1.0' encoding='utf-8' ?><dictionary>");


                for(int i=sevenletter.size()-lastXml;i<sevenletter.size();i++) {
                    findWords(sevenletter.get(i));

                    out1.write("<ltr s='"+sevenletter.get(i)+"' w=");

                    for(int j=0; j<words.size();j++) {

                        out1.write("'"+words.get(j)+"'");
                        if(j<words.size()-1)
                            out1.write(";");
                    }
                    out1.write("/>");
                }
                out1.write("</dictionary>");
                //Close the output stream
                out1.close(); 
        }
        catch (Exception e){
        System.err.println("Error: " + e.getMessage());
            }

      }

   }//main
   }//class

2 个答案:

答案 0 :(得分:2)

我将每个文件编码为UTF-8

对于每个语言,我会将每个字典读成Set<String>可能是NavigableSet<String>,我会将这些字典放在一个由该语言键入的地图上。

答案 1 :(得分:0)

另一种方法是在地图中使用单词作为键:

HashMap<(String) word ,Set<(String) codeLanguage>>

并且正如Peter Lawrey所说,在同一张地图中

 <(String) codeLanguage, Set(String) allLanguageWorld>>

如果使用东方语言,则必须使用Unicode。