在java中计算立即重复的字母

时间:2018-01-02 22:01:16

标签: java

我正在尝试编写一个代码来读取文件中的句子并计算重复的字母并将其作为字符串存储在另一个文件中作为输出。

问题是代码只计算一个重复的字母,而不会存储其他字母......

Repeat.java

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Formatter;
import java.util.FormatterClosedException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.HashMap;
import java.util.Map;

public class Repeat {

    public static String text;
    static String vector = "";

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub
        Formatter output;

        try {
            Path p = Paths.get("G:\\sarcasm\\out2.txt").toAbsolutePath();
            output = new Formatter(p.toString(), "utf-8");
            Scanner input = new Scanner(Paths.get("G:\\sarcasm\\repeatinput.txt"), "utf-8");

            while (input.hasNextLine()) {
                String line = input.nextLine();
                String lineout = new String();
                List<String> filtered = Arrays.asList(line.split("\\s"));
                String result = "";

                //loop through each word in the sentence, peeking at the next and comparing it with the current word
                Map<String, Integer> letterCount = new HashMap();
                int sLen = line.length();
                for (int i = 0; i < sLen; i++) {
                    String currLetter = String.valueOf(line.charAt(i));
                    String nextLetter = i + 1 < sLen ? String.valueOf(line.charAt(i + 1)) : "";
                    // check the next word, if it matches, update the map count
                    if (currLetter.equalsIgnoreCase(nextLetter)) {
                        if (!letterCount.containsKey(currLetter)) {
                            letterCount.put(currLetter, 1);
                        } else {
                            letterCount.put(currLetter, (letterCount.get(currLetter) + 1));
                        }
                    }
                }
                for (String key : letterCount.keySet()) {
                    result += " Letter " + key + " appears " + letterCount.get(key) + " times";
                    if (key.equals("ا")) {
                        vector = letterCount.get(key) + ", ";
                    } else {
                        vector = "0, ";
                    }
                    if (key.equals("ب")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ت")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ث")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ج")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ح")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("خ")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("د")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ذ")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ر")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ز")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("س")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ش")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ص")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ض")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ط")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ظ")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ع")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("غ")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ف")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ق")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ك")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ل")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("م")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ن")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ه")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("و")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("ي")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("!")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals("?")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals(".")) {
                        vector += letterCount.get(key) + ", ";
                    } else {
                        vector += "0, ";
                    }
                    if (key.equals(",")) {
                        vector += letterCount.get(key) + ".";
                    } else {
                        vector += "0.";
                    }

                }


                output.format("%s\n", vector);
            }
            input.close();
            output.close();
        } catch (NoSuchElementException excp) {
            System.err.println(excp.getMessage());
        } catch (FormatterClosedException excp) {
            System.err.println(excp.getMessage());
        } catch (IllegalStateException excp) {
            System.err.println(excp.getMessage());
        }
        System.exit(0);

    }
}

,输入文件为:

  

اهلاوسهل​​اااابكمم!!

     

يااهلااااومرحببااا

     

كككيفكم???

     

هههههههههههههه

我得到的输出是:

  

3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ,0,0,0,0,0,0,0,0。

     

0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ,0,0,0,0,0,0,0,0。

     

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ,0,0,0,0,0,2,0,0。

     

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ,0,13,0,0,0,0,0,0。

它应该在哪里:

  

3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 ,0,0,0,0,1,0,0,0。

     

5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ,0,0,0,0,0,0,0,0。

     

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ,0,0,0,0,0,2,0,0。

     

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ,0,13,0,0,0,0,0,0。

我怎么能解决这个问题?

1 个答案:

答案 0 :(得分:1)

看起来你让它变得复杂了。我删除了所有不必要的代码,并使用2个列表编写了一个干净的解决方案。

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class Repeat {

    public static String text;
    static String vector = "";

    public static void main(String[] args) throws IOException {
        String line = "اهلا وسهلاااا بكمم!!";
        List<String> vals = new ArrayList<>();
        vals.add("ا");
        vals.add("ب");
        vals.add("ت");
        vals.add("ث");
        vals.add("ج");
        vals.add("ح");
        vals.add("خ");
        vals.add("د");
        vals.add("ذ");
        vals.add("ر");
        vals.add("ز");
        vals.add("س");
        vals.add("ش");
        vals.add("ص");
        vals.add("ض");
        vals.add("ط");
        vals.add("ظ");
        vals.add("ع");
        vals.add("غ");
        vals.add("ف");
        vals.add("ق");
        vals.add("ك");
        vals.add("ل");
        vals.add("م");
        vals.add("ن");
        vals.add("ه");
        vals.add("و");
        vals.add("ي");
        vals.add("!");
        vals.add("?");
        vals.add(".");
        vals.add(",");

        List<Integer> results = new ArrayList<>(vals.size());

        for (int i = 0; i < vals.size(); i++) {
            results.add(0);
        }

        for (int i = 0; i < line.length(); i++) {
            String s = "" + line.charAt(i);
            for (int j = 0; j < vals.size(); j++) {
                if (i < line.length() - 1) {
                    if (vals.get(j).equals(s) && vals.get(j).equalsIgnoreCase("" + line.charAt(i + 1))) {
                        results.set(j, results.get(j) + 1);
                    }
                }

            }

        }

        System.out.println(results);

    }

}

这给了我输出

[3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]