Java程序 - 单词统计

时间:2012-04-23 08:45:48

标签: java count statistics word keyword

我对Java很陌生,所以请跟我一起玩。

我创建了一个Java程序,它根据关键字文本文件从各种社交媒体API返回搜索结果,并将返回的结果写入另一个文本文件。

我现在正在尝试使用另一个类单独的类来扫描搜索结果文本文件,并返回有关在搜索结果中找到“关键字”的次数的统计信息。我基本上试图对keywords.txt文件进行搜索。

我一直试图让下面的代码工作,但我只能让它返回所有单词,而不仅仅是那些来自keywords.txt文件的单词。

我试图让输入StreamReader工作但无济于事。任何帮助将不胜感激。


package search;

import java.util.*; // Provides TreeMap, Iterator, Scanner
import java.io.*; // Provides FileReader, FileNotFoundException

public class SearchResults
{

    public static void main(String[] args) 
    { 
        TreeMap<String, Integer> frequencyData = new TreeMap<String, Integer>();
        readWordFile(frequencyData); 
        printAllCounts(frequencyData);
    }

    public static int getCount(String word, TreeMap<String, Integer> frequencyData)
    {
        if (frequencyData.containsKey(word))
        { // The word has occurred before, so get its count from the map
            return frequencyData.get(word); // Auto-unboxed
        }
        else
        { // No occurrences of this word
            return 0;
        }
    }

    public static void printAllCounts(TreeMap<String, Integer> frequencyData)
    {
        System.out.println("-----------------------------------------------");
        System.out.println(" Occurrences Word");

        for(String word : frequencyData.keySet())
        {
            System.out.printf("%15d %s\n", frequencyData.get(word), word);
        }

        System.out.println("-----------------------------------------------");
    } 

    public static void readWordFile(TreeMap<String, Integer> frequencyData)
    {
        Scanner wordFile;
        String word; // A word read from the file
        Integer count; // The number of occurrences of the word

        try
        {
            wordFile = new Scanner(new FileReader("SearchResults.txt"));
        }
        catch (FileNotFoundException e)
        {
            System.err.println(e);
            return;
        }

        while (wordFile.hasNext())
        {
            // Read the next word and get rid of the end-of-line marker if needed:
            word = wordFile.next();

            // Get the current count of this word, add one, and then store the new count:
            count = getCount(word, frequencyData) + 1;
            frequencyData.put(word, count);
        }
    } 
}

1 个答案:

答案 0 :(得分:1)

在那里,我清理了你的代码并添加了在keywords.txt中找到的所有关键字的过滤。

public static void main(String[] args) throws Exception {
  printAllCounts(
    readWordFile("SearchResults.txt", loadKeywords("keywords.txt")));
}

private static Map<String, Integer> readWordFile(
  String fname, Set<String> keywords) throws FileNotFoundException
{
  final Map<String, Integer> frequencyData = new TreeMap<String, Integer>();
  for (Scanner wordFile = new Scanner(new FileReader(fname)); 
    wordFile.hasNext();) 
  {
    final String word = wordFile.next();
    if (keywords.contains(word)) 
      frequencyData.put(word, getCount(word, frequencyData) + 1);
  }
  return frequencyData;
}

private static void printAllCounts(Map<String, Integer> frequencyData) {
  System.out.println("-----------------------------------------------");
  System.out.println(" Occurrences Word");
  for(Map.Entry<String, Integer> e : frequencyData.entrySet())
    System.out.printf("%15d %s\n", e.getValue(), e.getKey());
  System.out.println("-----------------------------------------------");
}

private static int getCount(String word, Map<String, Integer> frequencyData) {
    return frequencyData.containsKey(word)? frequencyData.get(word) : 0;
}

private static Set<String> loadKeywords(String fname) 
throws FileNotFoundException 
{
  final Set<String> result = new HashSet<String>();
  for (Scanner s = new Scanner(new FileReader(fname)); s.hasNext();) 
    result.add(s.next());
  return result;
}