我有两个文本文件。我必须开发一个Java程序来比较两个文件并找到唯一的单词。我尝试了几种方法,但是没有用。示例:
test1.txt:
I am a robot. My name is Sofia.
test2.txt:
Hello I am a man. My name is Alex
输出:
Hello robot man Sofia Alex
我的方法是这样的:
import java.io.*;
import java.util.*;
public class Main {
public static void main(String[] args)
throws FileNotFoundException {
Scanner input = new Scanner(new File("test1.txt"));
Scanner scan = new Scanner(new File("test2.txt"));
ArrayList<String> al = new ArrayList<String>();
ArrayList<String> a2 = new ArrayList<String>();
String test;
while (input.hasNext()) {
String next = input.next();
}
System.out.println("arraylist" + al);
while (scan.hasNext()) {
test = scan.next();
a2.add(test);
}
System.out.println("arraylist2" + a2);
for( int i = 0; i < al.size(); i++){
for(int j = 0; j < a2.size(); j++){
if(al.get(i).equals(a2.get(j))){
break;}
else{
System.out.println(al.get(i));break;
}
}
}
}
}
答案 0 :(得分:0)
请注意,这是一种快速而肮脏的方法,效率很低。此外,我不知道您的确切要求(句号?大写还是小写?)。
还要考虑到该程序不会检查哪个列表更长。但这应该给您一个很好的提示:
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Scanner;
public class Main {
public static void main(String[] args) throws FileNotFoundException {
Scanner input = new Scanner(new File("test1.txt"));
Scanner scan = new Scanner(new File("test2.txt"));
ArrayList<String> list1 = new ArrayList<String>();
ArrayList<String> list2 = new ArrayList<String>();
while (input.hasNext()) {
list1.add(input.next());
}
while (scan.hasNext()) {
list2.add(scan.next());
}
// iterate over list 1
for (int i = list1.size() - 1; i >= 0; i--) {
// if there is a occurence of two identical strings
if (list2.contains(list1.get(i))) {
// remove the String from list 2
list2.remove(list2.indexOf(list1.get(i)));
// remove the String from list 1
list1.remove(i);
}
}
// merge the lists
list1.addAll(list2);
// remove full stops
for (int i = 0; i < list1.size(); i++) {
list1.set(i, list1.get(i).replace(".", ""));
}
System.out.println("Unique Values: " + list1);
}
}
答案 1 :(得分:0)
假设是文本文件仅包含(。)作为句子终止符。
public static void main(String[] args) throws Exception
{
// Skipping reading from file and storing in string
String stringFromFileOne = "I am a robot. My name is Sofia.";
String stringFromFileTwo = "Hello I am a man. My name is Alex";
Set<String> set1 = Arrays.asList(stringFromFileOne.split(" "))
.stream()
.map(s -> s.toLowerCase())
.map(m -> m.contains(".") ? m.replace(".", "") : m)
.sorted()
.collect(Collectors.toSet());
Set<String> set2 = Arrays.asList(stringFromFileTwo.split(" "))
.stream()
.map(s -> s.toLowerCase())
.map(m -> m.contains(".") ? m.replace(".", "") : m)
.sorted()
.collect(Collectors.toSet());
List<String> uniqueWords;
if (set1.size() > set2.size()) {
uniqueWords = getUniqueWords(set2, set1);
} else {
uniqueWords = getUniqueWords(set1, set2);
}
System.out.println("uniqueWords:" + uniqueWords);
}
private static List<String> getUniqueWords(Set<String> removeFromSet, Set<String> iterateOverSet) {
List<String> uniqueWords;
Set<String> tempSet = new HashSet<String>(removeFromSet);
removeFromSet.removeAll(iterateOverSet);
uniqueWords = iterateOverSet.stream().filter(f -> !tempSet.contains(f) && !f.isEmpty())
.collect(Collectors.toList());
uniqueWords.addAll(removeFromSet);
return uniqueWords;
}
答案 2 :(得分:0)
您可以使用番石榴库,该库为您提供了两组之间的区别。
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import com.google.common.collect.Sets;
public class WordTest {
public static void main(String[] args) {
WordTest wordTest = new WordTest();
Set<String> firstFileWords = wordTest.getAllWords("E:\\testing1.txt");
Set<String> secondFileWords = wordTest.getAllWords("E:\\testing2.txt");
Set<String> diff = Sets.difference(firstFileWords, secondFileWords);
Set<String> diff2 = Sets.difference(secondFileWords, firstFileWords);
System.out.println("Set 1: " + firstFileWords);
System.out.println("Set 2: " + secondFileWords);
System.out.println("Difference between " + "Set 1 and Set 2: " + diff);
System.out.println("Difference between " + "Set 2 and Set 1: " + diff2);
}
public Set<String> getAllWords(String path) {
FileInputStream fis = null;
DataInputStream dis = null;
BufferedReader br = null;
Set<String> wordList = new HashSet<>();
try {
fis = new FileInputStream(path);
dis = new DataInputStream(fis);
br = new BufferedReader(new InputStreamReader(dis));
String line = null;
while ((line = br.readLine()) != null) {
StringTokenizer st = new StringTokenizer(line, " ,.;:\"");
while (st.hasMoreTokens()) {
wordList.add(st.nextToken());
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
} catch (Exception ex) {
}
}
return wordList;
}
}
答案 3 :(得分:0)
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
public class FileComparision {
public static void main(String[] args) throws IOException {
HashSet<String> uniqueSet=new HashSet<String>();
//split the lines based on the delimiter and add it to set
BufferedReader reader=new BufferedReader(new FileReader("test1.txt"));
String line;
while ((line = reader.readLine()) != null) {
Arrays.asList(line.split(" ")).forEach(word->uniqueSet.add(word) ); ;
}
reader.close();
reader=new BufferedReader(new FileReader("test2.txt"));
while ((line = reader.readLine()) != null) {
Arrays.asList(line.split(" ")).forEach(word->{
if(!uniqueSet.contains(word)) {
uniqueSet.add(word) ;
}else {
uniqueSet.remove(word);
}
});
}
reader.close();
//to remove unnecessary characters
//uniqueSet.remove(".");
System.out.println(uniqueSet);
}
}
答案 4 :(得分:-2)
public static String readFile(String fileName)throws Exception
{
String data = "";
data = new String(Files.readAllBytes(Paths.get(fileName)));
return data;
}
public static void main(String[] args) throws Exception
{
String data = readFileAsString("C:\\Users\\pb\\Desktop\\text1.txt");
String data1 = readFileAsString("C:\\Users\\pb\\Desktop\\text2.txt");
String array[]=data.split(" ");
String array1[]=data1.split(" ");
for(int i=0;i<=array1.length-1;i++){
if(data.contains(array1[i])){
}else{
System.out.println(array1[i]);
}
}
for(int i=0;i<=array.length-1;i++){
if(data1.contains(array[i])){
}else{
System.out.println(array[i]);
}
}
}