Java中使用链接列表的快速单词计数

时间:2018-09-23 18:20:10

标签: java

因此,这里是计算文本文档中出现频率最高的单词的程序。但是由于我猜想的get(i)方法,它的速度非常慢。任何想法如何使其更快?我知道如果我使用数组会更快,但是我希望它保留在链表中,如果可能的话,只需更改get(i)部分即可。

import java.io.File;
    import java.util.Scanner;
    import java.util.Map.Entry;
    import java.util.AbstractMap;
    import java.util.LinkedList;

public class wordcount {


    public static Entry<String, Integer> count_LINKED_LIST(String[] tokens) {
        LinkedList<Entry<String, Integer>> list = new LinkedList<Entry<String, Integer>>();
        for (int j = 0; j < tokens.length; j++) {
            String word = tokens[j];
            boolean found = false;
            for (int i = 0; i < list.size(); i++) {
                Entry<String, Integer> e = list.get(i);

                if (word.equals(e.getKey())) {
                    e.setValue(e.getValue() + 1);
                    list.set(i, e);
                    found = true;
                    break;
                }
            }
            if (!found)
                list.add(new AbstractMap.SimpleEntry<String, Integer>(word, 1));
        }

        int maxCount = 0;
        String maxWord = "";
        for (int i = 0; i < list.size(); i++) {
            int count = list.get(i).getValue();
            if (count > maxCount) {
                maxWord = list.get(i).getKey();
                maxCount = count;
            }
        }
        return new AbstractMap.SimpleEntry<String, Integer>(maxWord, maxCount);
    }

    static String[] readText(String PATH) throws Exception {
        Scanner doc = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
        int length = 0;
        while (doc.hasNext()) {
            doc.next();
            length++;
        }

        String[] tokens = new String[length];
        Scanner s = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
        length = 0;
        while (s.hasNext()) {
            tokens[length] = s.next().toLowerCase();
            length++;
        }
        doc.close();

        return tokens;
    }

    public static void main(String[] args) throws Exception {

        String PATH = "/Users/username/foldername/textdocument.txt";
        String[] tokens = readText(PATH);
        long startTime = System.currentTimeMillis();
        Entry<String, Integer> entry = count_LINKED_LIST(tokens);
        long endTime = System.currentTimeMillis();
        String time = String.format("%12d", endTime - startTime);
        System.out.println("time\t" + time + "\t" + entry.getKey() + ":" + entry.getValue());
    }

}

1 个答案:

答案 0 :(得分:0)

您可以为此使用映射(令牌是密钥,令牌出现是值)并使用Java8 + Stream API:

public static Map<String, Long> count(String[] tokens) { 
    return Arrays.stream(tokens).collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
}