我正在编写通过main方法中的命令行参数读取文本文件的代码,并在其自己的行中打印出每个单词,而不会多次打印任何单词,它不会打印任何内容,任何人都可以帮助?
import java.util.*;
import java.io.*;
public class Tokenization {
public static void main(String[] args) throws Exception{
String x = "";
String y = "";
File file = new File(args[0]);
Scanner s = new Scanner(file);
String [] words = null;
while (s.hasNext()){
x = s.nextLine();
}
words = x.split("\\p{Punct}");
String [] moreWords = null;
for (int i = 0; i < words.length;i++){
y = y + " " + words[i];
}
moreWords = y.split("\\s+");
String [] unique = unique(moreWords);
for (int i = 0;i<unique.length;i++){
System.out.println(unique[i]);
}
s.close();
}
public static String[] unique (String [] s) {
String [] uniques = new String[s.length];
for (int i = 0; i < s.length;i++){
for(int j = i + 1; j < s.length;j++){
if (!s[i].equalsIgnoreCase(s[j])){
uniques[i] = s[i];
}
}
}
return uniques;
}
}
答案 0 :(得分:0)
你有几个问题:
以下是您需要的缩短版本:
import java.io.File;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
public class Tokenization {
public static void main(String[] args) throws Exception {
Set<String> words = new HashSet<String>();
try {
File file = new File(args[0]);
Scanner scanner = new Scanner(file);
while (scanner.hasNext()) {
String[] lineWords = scanner.nextLine().split("[\\p{Punct}\\s]+");
for (String s : lineWords)
words.add(s.toLowerCase());
}
scanner.close();
} catch (Exception e) {
System.out.println("Cannot read file [" + e.getMessage() + "]");
System.exit(1);
}
for (String s : words)
System.out.println(s);
}
}