比较两个文本文件并在Java中显示唯一的单词

时间:2019-05-15 04:47:10

标签: java

我有两个文本文件。我必须开发一个Java程序来比较两个文件并找到唯一的单词。我尝试了几种方法,但是没有用。示例:

test1.txt:

I am a robot. My name is Sofia.

test2.txt:

Hello  I am a man. My name is Alex

输出:

Hello robot man Sofia Alex

我的方法是这样的:

import java.io.*;
import java.util.*;

public class Main {
    public static void main(String[] args)
            throws FileNotFoundException {
        Scanner input = new Scanner(new File("test1.txt"));
        Scanner scan = new Scanner(new File("test2.txt"));

        ArrayList<String> al = new ArrayList<String>();
        ArrayList<String> a2 = new ArrayList<String>();
         String test;

        while (input.hasNext()) {
            String next = input.next();


           }
         System.out.println("arraylist" + al);

        while (scan.hasNext()) {

                test = scan.next();
               a2.add(test);

           }
         System.out.println("arraylist2" + a2);

    for( int i = 0; i < al.size(); i++){
      for(int j = 0; j < a2.size(); j++){
            if(al.get(i).equals(a2.get(j))){
          break;}
          else{
          System.out.println(al.get(i));break;
          }
    }
    }    

}
}

5 个答案:

答案 0 :(得分:0)

请注意,这是一种快速而肮脏的方法,效率很低。此外,我不知道您的确切要求(句号?大写还是小写?)。

还要考虑到该程序不会检查哪个列表更长。但这应该给您一个很好的提示:

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Scanner;

public class Main {

    public static void main(String[] args) throws FileNotFoundException {
        Scanner input = new Scanner(new File("test1.txt"));
        Scanner scan = new Scanner(new File("test2.txt"));

        ArrayList<String> list1 = new ArrayList<String>();
        ArrayList<String> list2 = new ArrayList<String>();

        while (input.hasNext()) {
            list1.add(input.next());
        }

        while (scan.hasNext()) {
            list2.add(scan.next());
        }

        // iterate over list 1
        for (int i = list1.size() - 1; i >= 0; i--) {
            // if there is a occurence of two identical strings
            if (list2.contains(list1.get(i))) {
                // remove the String from list 2
                list2.remove(list2.indexOf(list1.get(i)));
                // remove the String from list 1
                list1.remove(i);
            }
        }

        // merge the lists
        list1.addAll(list2);

        // remove full stops
        for (int i = 0; i < list1.size(); i++) {
            list1.set(i, list1.get(i).replace(".", ""));
        }
        System.out.println("Unique Values: " + list1);
    }

}

答案 1 :(得分:0)

假设是文本文件仅包含(。)作为句子终止符。

    public static void main(String[] args) throws Exception 
       { 

        // Skipping reading from file and storing in string

        String stringFromFileOne = "I am a robot. My name is Sofia.";
        String stringFromFileTwo = "Hello  I am a man. My name is Alex";

        Set<String> set1 = Arrays.asList(stringFromFileOne.split(" "))
                .stream()
                .map(s -> s.toLowerCase())
                .map(m -> m.contains(".") ? m.replace(".", "") : m)
                .sorted()
                .collect(Collectors.toSet());

        Set<String> set2 = Arrays.asList(stringFromFileTwo.split(" "))
                .stream()
                .map(s -> s.toLowerCase())
                .map(m -> m.contains(".") ? m.replace(".", "") : m)
                .sorted()
                .collect(Collectors.toSet());

        List<String> uniqueWords;

        if (set1.size() > set2.size()) {
            uniqueWords = getUniqueWords(set2, set1);
        } else {
            uniqueWords = getUniqueWords(set1, set2);
        }
        System.out.println("uniqueWords:" + uniqueWords);
    }

    private static List<String> getUniqueWords(Set<String> removeFromSet, Set<String> iterateOverSet) {
        List<String> uniqueWords;
        Set<String> tempSet = new HashSet<String>(removeFromSet);
        removeFromSet.removeAll(iterateOverSet);
        uniqueWords = iterateOverSet.stream().filter(f -> !tempSet.contains(f) && !f.isEmpty())
                .collect(Collectors.toList());
        uniqueWords.addAll(removeFromSet);
        return uniqueWords;
    }

答案 2 :(得分:0)

您可以使用番石榴库,该库为您提供了两组之间的区别。

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;

import com.google.common.collect.Sets;

public class WordTest {

    public static void main(String[] args) {
        WordTest wordTest = new WordTest();

        Set<String> firstFileWords = wordTest.getAllWords("E:\\testing1.txt");
        Set<String> secondFileWords = wordTest.getAllWords("E:\\testing2.txt");
        Set<String> diff = Sets.difference(firstFileWords, secondFileWords);
        Set<String> diff2 = Sets.difference(secondFileWords, firstFileWords);

        System.out.println("Set 1: " + firstFileWords);
        System.out.println("Set 2: " + secondFileWords);
        System.out.println("Difference between " + "Set 1 and Set 2: " + diff);
        System.out.println("Difference between " + "Set 2 and Set 1: " + diff2);
    }

    public Set<String> getAllWords(String path) {
        FileInputStream fis = null;
        DataInputStream dis = null;
        BufferedReader br = null;
        Set<String> wordList = new HashSet<>();
        try {
            fis = new FileInputStream(path);
            dis = new DataInputStream(fis);
            br = new BufferedReader(new InputStreamReader(dis));
            String line = null;
            while ((line = br.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line, " ,.;:\"");
                while (st.hasMoreTokens()) {
                    wordList.add(st.nextToken());
                }
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (br != null)
                    br.close();
            } catch (Exception ex) {
            }
        }
        return wordList;
    }

}

答案 3 :(得分:0)

import java.io.BufferedReader;

import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;

public class FileComparision {
    public static void main(String[] args) throws IOException {

        HashSet<String> uniqueSet=new HashSet<String>();

        //split the lines based on the delimiter and add it to set
        BufferedReader reader=new BufferedReader(new FileReader("test1.txt"));
        String line; 
        while ((line = reader.readLine()) != null) {
            Arrays.asList(line.split(" ")).forEach(word->uniqueSet.add(word) ); ;
        } 
        reader.close();

        reader=new BufferedReader(new FileReader("test2.txt"));          
        while ((line = reader.readLine()) != null) {
            Arrays.asList(line.split(" ")).forEach(word->{
                if(!uniqueSet.contains(word)) {
                    uniqueSet.add(word) ;
                }else {
                    uniqueSet.remove(word);
                }

            });

        } 
        reader.close(); 
        //to remove unnecessary characters
        //uniqueSet.remove(".");
        System.out.println(uniqueSet);



    }
}

答案 4 :(得分:-2)

 public static String readFile(String fileName)throws Exception 
      { 
        String data = ""; 
        data = new String(Files.readAllBytes(Paths.get(fileName))); 
        return data; 
      } 

      public static void main(String[] args) throws Exception 
      { 
        String data = readFileAsString("C:\\Users\\pb\\Desktop\\text1.txt"); 
        String data1 = readFileAsString("C:\\Users\\pb\\Desktop\\text2.txt"); 
       String array[]=data.split(" ");
       String array1[]=data1.split(" ");


                   for(int i=0;i<=array1.length-1;i++){
                       if(data.contains(array1[i])){

                       }else{
                           System.out.println(array1[i]);
                       }
                   }


                   for(int i=0;i<=array.length-1;i++){
                       if(data1.contains(array[i])){

                       }else{
                           System.out.println(array[i]);
                       }
                   }


      }