我如何在java中阅读äöüß?

时间:2013-04-12 17:56:38

标签: java

   import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

public class Test {

    List<String> knownWordsArrayList = new ArrayList<String>();
    List<String> wordsArrayList = new ArrayList<String>();
    List<String> newWordsArrayList = new ArrayList<String>();
    String toFile = "";

    public void readKnownWordsFile() {
        try {
            FileInputStream fstream2 = new FileInputStream("knownWords.txt");

            BufferedReader br2 = new BufferedReader(new InputStreamReader(fstream2, "UTF-8"));
            String strLine;
            while ((strLine = br2.readLine()) != null) {
                knownWordsArrayList.add(strLine.toLowerCase());
            }
            HashSet h = new HashSet(knownWordsArrayList);
            // h.removeAll(knownWordsArrayList);
            knownWordsArrayList = new ArrayList<String>(h);
            // for (int i = 0; i < knownWordsArrayList.size(); i++) {
            // System.out.println(knownWordsArrayList.get(i));
            // }
        } catch (Exception e) {
            // TODO: handle exception
        }

    }

    public void readFile() {
        try {
            // Open the file that is the first
            // command line parameter
            FileInputStream fstream = new FileInputStream("Smallville 4x02.de.srt");

            BufferedReader br = new BufferedReader(new InputStreamReader(fstream));

            String strLine;

            String numberedLineRemoved = "";
            String strippedInput = "";
            String[] words;
            String trimmedString = "";
            String temp = "";
            // Read File Line By Line
            while ((strLine = br.readLine()) != null) {
                temp = strLine.toLowerCase();
                // Print the content on the console
                numberedLineRemoved = numberedLine(temp);
                strippedInput = numberedLineRemoved.replaceAll("\\p{Punct}", "");
                if ((strippedInput.trim().length() != 0) || (!strippedInput.contains("")) || (strippedInput.contains(" "))) {
                    words = strippedInput.split("\\s+");
                    for (int i = 0; i < words.length; i++) {
                        if (words[i].trim().length() != 0) {
                            wordsArrayList.add(words[i]);
                        }
                    }
                }
            }

            HashSet h = new HashSet(wordsArrayList);
            h.removeAll(knownWordsArrayList);
            newWordsArrayList = new ArrayList<String>(h);

            // HashSet h = new HashSet(wordsArrayList);
            // wordsArrayList.clear();
            // newWordsArrayList.addAll(h);

            for (int i = 0; i < newWordsArrayList.size(); i++) {
                toFile = newWordsArrayList.get(i) + ".\n";
//              System.out.println(newWordsArrayList.get(i) + ".");
                System.out.println();
            }

            System.out.println(newWordsArrayList.size());
            // Close the input stream
            in.close();
        } catch (Exception e) {// Catch exception if any
            System.err.println("Error: " + e.getMessage());
        }
    }

    public String numberedLine(String string) {
        if (string.matches(".*\\d.*")) {
            return "";
        } else {
            return string;
        }
    }

    public void writeToFile() {
        try {
            // Create file
            FileWriter fstream = new FileWriter("out.txt");
            BufferedWriter out = new BufferedWriter(fstream);
            out.write(toFile);
            // Close the output stream
            out.close();
        } catch (Exception e) {// Catch exception if any
            System.err.println("Error: " + e.getMessage());
        }
    }

    public static void main(String[] args) {
        Test test = new Test();
        test.readKnownWordsFile();
        test.readFile();
        test.writeToFile();

    }

}

如何从文件中阅读äöüß? string.toLowercase()也能正确处理这些吗? 当我打印包含任何äöüß的单词时,我该如何正确打印这个单词? 当我打印到控制台时,我得到了 Außerdem weiß 对于Außerdem 魏斯 我该如何解决这个问题?

我试过了:

BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));

但现在我得到了aufkl?ren而不是aufklären,以及其他地方的混乱。

更新了代码以查看它是否会正确打印在文件上,但我只是在文件中找到一个。

1 个答案:

答案 0 :(得分:1)

您需要使用用于创建文件的字符集来读取文件。如果你在Windows机器上,那可能是cp1252。所以:

BufferedReader br = new BufferedReader(new InputStreamReader(in, "Cp1252"));

如果这不起作用,大多数文本编辑器都能告诉您给定文档使用的编码。