我使用Map<String, Integer>
在Java中构建了一个单词计数器,其中一个单词是String
,而单词在.txt文档中说的次数是Integer
。一切都很好。现在我正试图找到我有String[] noWords = [list of words to exclude]
的部分,例如介词和非常常见的单词 - &gt; “,和,等等”。
我无法通过列表查看单词的循环,删除它是否等于数组的元素。
任何帮助都将不胜感激。
以下是完整运行的代码:
import java.util.Map;
import java.util.Map.Entry;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.Set;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Objects;
import java.util.Collections;
import java.util.stream.Collectors;
import java.io.File;
import java.io.FileNotFoundException;
public class WordCounter {
public static void run(String filename, String filename2) throws FileNotFoundException {
String[] noWords = {"aboard", "about", "above", "across", "after", "against", "along", "amid", "among", "anti", "around", "as", "at", "before", "behind", "below", "beneath", "beside", "besides", "between", "beyond", "but", "by", "concerning", "considering", "despite", "down", "during", "except", "excepting", "excluding", "following", "for", "from", "in", "inside", "into", "like", "minus", "near", "of", "off", "on", "onto", "opposite", "outside", "over", "past", "per", "plus", "regarding", "round", "save", "since", "than", "through", "to", "toward", "towards", "under", "underneath", "unlike", "until", "up", "upon", "versus", "via", "with", "within", "without", "all", "another", "any", "anybody", "anyone", "anything", "both", "each", "either", "everybody", "everyone", "everything", "few", "he", "her", "hers", "herself", "him", "himself", "his", "I", "it", "its", "itself", "many", "me mine", "more", "most", "much", "my", "myself", "neither", "no one", "nobody", "none", "nothing", "one", "other", "others", "our", "ours", "ourselves", "several", "she", "some", "somebody", "someone", "something", "that", "their", "theirs", "them", "themselves", "these", "they", "this", "those", "us", "we", "what", "whatever", "which", "whichever", "who", "whoever", "whom", "whomever", "whose", "you", "your", "yours", "yourself", "yourselves"};
Map<String, Integer> wordCount = readFile(filename2);
Integer sum = 0;
for (Integer i : wordCount.values()) {
sum += i;
}
int amount = wordCount.size();
Set<String> keySet = wordCount.keySet();
for (String key : keySet) {
int value = wordCount.get(key);
System.out.println(key + " was said " + value + " time(s) in "+filename);
}
System.out.println("\ntotal amount of words: " + sum);
System.out.println("total amount of different words: " + amount + "\n");
wordCount.keySet().removeAll(Arrays.asList(noWords));
Set<Entry<String, Integer>> set = wordCount.entrySet();
List<Entry<String, Integer>> list = new ArrayList<Entry<String, Integer>>(set);
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
int topResults = 5;
int counter = 1;
Iterator<Entry<String, Integer>> iter = list.iterator();
while (iter.hasNext() && topResults > 0 ) {
Map.Entry<String, Integer> entry = iter.next();
System.out.println(counter + ". \"" + entry.getKey() + "\" was said " + entry.getValue() + " times");
topResults --;
counter++;
}
}
public static Map<String, Integer> readFile(String filename) throws FileNotFoundException {
Map<String, Integer> wordCount = new TreeMap<String, Integer>();
int counter;
Scanner in = new Scanner(new File(filename));
in.useDelimiter("[^a-zA-Z]+");
while (in.hasNext()) {
String next = in.next().toLowerCase();
if (wordCount.get(next) == null) {
counter = 1;
wordCount.put(next, counter);
} else {
counter = wordCount.get(next) + 1;
wordCount.put(next, counter);
}
}
in.close();
return wordCount;
}
}
它在主要通道中的单独文件中被调用:
WordCounter counter = new WordCounter();
System.out.print("directions:\n" +
"place your text file in the \"textfiles\" folder\n\n" +
"file name: ");
String counterInputFile = s.nextLine();
String counterInputFile2 = "textfiles/"+counterInputFile;
try {
counter.run(counterInputFile, counterInputFile2);
} catch(FileNotFoundException e) {
System.out.println("file not found");
}