Java,文本中的n个单词数

时间:2013-08-20 09:29:14

标签: java string word

此类的目的是计算用户输入的文本中每个n字符单词的出现次数。我认为在methodsNumber方法中有一些错误,因为我为每个n字符单词获得228。错误在哪里?

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Scanner;

public class TempTest {
    public static void main(String[] args) {

        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        System.out.println("Enter the integer n");
        Scanner sc = new Scanner(System.in);
        int n = sc.nextInt();
        char[] array = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ' };
        StringBuffer[] table = createTotalTable(n, array);

        System.out.println("Enter the text ");
        StringBuffer text = new StringBuffer("");
        try {
            text = new StringBuffer(in.readLine());
        } catch (IOException a) {
            System.out.println("Input-Output problem");
        }
        StringBuffer text_formatted = format(text.toString());
        System.out.println("The formatted text is \n" + text_formatted.toString() + "\n");

        System.out.println("Now we print all the n-character words in alphabetic order. Press enter to proceed. ");
        try {
            in.readLine();
        } catch (IOException b) {
            System.out.println("Input-Output problem");
        }
        for (StringBuffer word : table)
            System.out.println(word.toString());

        int[] occurrenceTable = createList(text_formatted, table, n);

        System.out.println("Now we print all the n-words contained in the text with the number of occurrences. Press enter to proceed.");
        try {
            in.readLine();
        } catch (IOException c) {
            System.out.println("Input-Output problem");
        }

        for (int u = 0; u < pow(27, n); u++)
            System.out.println(table[u].toString() + ", " + occurrenceTable[u]);

    }

    public static StringBuffer[] createTotalTable(int n, char[] a) { // this method create an array containing all the n-words in alphabetic order

        StringBuffer[] table = new StringBuffer[pow(27, n)];
        for (int w = 0; w < pow(27, n); w++)
            table[w] = new StringBuffer("");

        for (int h = 1; h <= n; h++) {
            for (int u = 0; u < pow(27, h - 1); u++) {

                for (int j = 0; j < 27; j++) {

                    for (int x = pow(27, n - h + 1) * u + pow(27, n - h) * j; x < pow(27, n - h + 1) * u + pow(27, n - h) * (j + 1); x++)
                        table[x] = table[x].append(a[j]);
                }

            }

        }

        return table;
    }

    public static int pow(int a, int b) { // the method Math.pow modified

        int tot = 1;
        for (int i = 0; i < b; i++)
            tot = a * tot;

        return tot;
    }

    public static int occurrenceNumber(StringBuffer testo, StringBuffer parola, int n) { // this method is aimed to calculate the number of occurrences of a
                                                                                            // word of length n in a text
        int tot = 0;

        if (n > testo.length())
            System.out.println("The integer is bigger than the text's length ");
        else {
            for (int i = 0; i <= testo.length() - n; i++) {
                if (testo.substring(i, i + n) == parola.toString())
                    tot += 1;

            }

        }

        return tot;
    }

    public static int[] createList(StringBuffer str, StringBuffer[] tabella, int n) { // this method is aimed to create an array containing for every position
                                                                                        // the number of occurrences of the corresponding word in the text

        int[] occurrenceTable = new int[pow(27, n)];

        for (int i = 0; i < pow(27, n); i++)
            occurrenceTable[i] = occurrenceNumber(str, tabella[i], n);

        return occurrenceTable;

    }

    public static StringBuffer format(String s) { // this method is aimed to
        // eliminate from the text all non-alphabetic characters and multiple spaces

        s = s.toLowerCase();
        StringBuffer b = new StringBuffer();
        int m = s.length();
        int conta_spazi = 0;
        StringBuffer h = new StringBuffer(s);
        for (int i = 0; i < m; i++) {
            switch (h.charAt(i)) {
            case 'a':
                break;

            case 'A':
                break;

            case 'b':
                break;

            case 'B':
                break;

            case 'c':
                break;

            case 'C':
                break;

            case 'd':
                break;

            case 'D':
                break;

            case 'e':
                break;

            case 'E':
                break;

            case 'f':
                break;

            case 'F':
                break;

            case 'g':
                break;

            case 'G':
                break;

            case 'h':
                break;

            case 'H':
                break;

            case 'i':
                break;

            case 'I':
                break;

            case 'j':
                break;

            case 'J':
                break;

            case 'k':
                break;

            case 'K':
                break;

            case 'l':
                break;

            case 'L':
                break;

            case 'm':
                break;

            case 'M':
                break;

            case 'n':
                break;

            case 'N':
                break;

            case 'o':
                break;

            case 'O':
                break;

            case 'p':
                break;

            case 'P':
                break;

            case 'q':
                break;

            case 'Q':
                break;

            case 'r':
                break;

            case 'R':
                break;

            case 's':
                break;

            case 'S':
                break;

            case 't':
                break;

            case 'T':
                break;

            case 'u':
                break;

            case 'U':
                break;

            case 'v':
                break;

            case 'V':
                break;

            case 'w':
                break;

            case 'W':
                break;

            case 'x':
                break;

            case 'X':
                break;

            case 'y':
                break;

            case 'Y':
                break;

            case 'z':
                break;

            case 'Z':
                break;

            default:
                h.setCharAt(i, ' ');

            }
        }
        for (int i = 0; i < m; i++) {

            if (h.charAt(i) == ' ')
                conta_spazi++;
            else
                conta_spazi = 0;

            if (conta_spazi <= 1)
                b = b.append(h.charAt(i));

        }

        return b;

    }
}

2 个答案:

答案 0 :(得分:2)

我无法弄清楚你正在实施的逻辑。但是,您可以通过更简单的方法找到计数:

    // extract n-char words and add them to a list
    ArrayList<String> arr = new ArrayList<String>();
    Pattern pattern = Pattern.compile("(^|\\s)\\w{" + n + "}(\\s|$)");
    Matcher matcher = pattern.matcher(text_formatted);
    while (matcher.find()) {
        int k = 0;
        arr.add(matcher.group(k++).trim());
    }
    // sort the list 
    Collections.sort(arr);
    // find the count and print
    String prev = "";
    int count = 1;
    for (int i = 0; i < arr.size(); i++) {
        if (prev.equals(arr.get(i))) {
            count++;
        } else {
            count = 1;
            System.out.print(arr.get(i)+" : ");
        }
        if(i<arr.size()-1 && !arr.get(i).equals(arr.get(i+1)))
        {
            System.out.println(count);
        }
        prev = arr.get(i);
    }
    System.out.println(count);

希望这会有所帮助。

答案 1 :(得分:0)

要比较2个字符串,您需要调用equals方法,而不是==

你应该替换:

 if(testo.substring(i,i+n) == parola.toString())

。通过

if(testo.substring(i,i+n).equals(parola.toString()))