比较两个文件并删除重复的部分以仅合并新内容

时间:2015-12-11 15:07:21

标签: java

我有两个.ckl文件,我试图从中读取,比较,然后只将新信息附加到最后并生成一个新的.ckl。逻辑某处存在问题,因为它只是合并两个文件而不删除任何重复部分。

以下是我的代码......

package hellockl;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;


public class HelloCKL {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws IOException {
        // TODO code application logic here
        String sourceFile1Path = "test.ckl";
        String sourceFile2Path = "test2.ckl";

        String mergedFilePath = "merged_test.ckl";

        // NEW
    //    System.out.println("here1");
        ArrayList<String> list = (ArrayList<String>) makeVulnList(sourceFile1Path);
    //    System.out.println("here2");

        File[] files = new File[2];
        files[0] = new File(sourceFile1Path);
        files[1] = new File(sourceFile2Path);

        File mergedFile = new File(mergedFilePath);

        mergeFiles(files, mergedFile, list);
    }

    public static void mergeFiles(File[] files, File mergedFile, ArrayList<String> list) {

        FileWriter fstream = null;
        BufferedWriter out = null;
        try {
            fstream = new FileWriter(mergedFile, true);
            out = new BufferedWriter(fstream);
        } catch (IOException e1) {
            e1.printStackTrace();
        }

        for (File f : files) {
            System.out.println("merging: " + f.getName());
            FileInputStream fis;
            try {
                fis = new FileInputStream(f);
                BufferedReader in = new BufferedReader(new InputStreamReader(fis));

                String aLine;
                while ((aLine = in.readLine()) != null) {
                    // NEW
                    if (aLine.equals("<VULN>")) {
                        // save the lines from here til ATTRIBUTE_DATA
                        aLine += in.readLine();
                        aLine += in.readLine();
                        // grab the line that would have the name
                        String nameLine = in.readLine();
                        if (list.contains(nameLine)) {
                            // need to advance the reader past the end of this VULN
                            while (!(aLine.equals("</VULN>"))) {
                                aLine = in.readLine();
                            }
                            continue; // this would skip the writing out to file part
                        }
                        aLine += nameLine; // concat this and go on as usual
                    }
                    // END NEW
                    out.write(aLine);
                    out.newLine();
                }

                in.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        try {
            out.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    // This should build a list of lines that have the vulnerability names. It lookes for a 
    // <VULN> tag using a Scanner and saves the <ATTRIBUTE_DATA>V - 3</ATTRIBUTE_DATA> line
    // into an arraylist. The we can use that list to compare other lines to to see if 
    // it already exists. 
    public static ArrayList<String> makeVulnList(String sourceFile1Path) {
    //    System.out.println("IN MAKE VULN LIST");
        ArrayList<String> list = new ArrayList<String>();
        Scanner scanner = new Scanner(sourceFile1Path);
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            System.out.println("on line: " + line);
            if (line.equals("<VULN>")) {
                System.out.println("match!!! : " + line);
                line = scanner.nextLine();
                line = scanner.nextLine();
                line = scanner.nextLine();
                list.add(line);
                System.out.println("adding to list : " + line);
            }
        }
        return list;
    }
}

.ckl不是最短的东西,但如果它有帮助我也可以附上它。

谢谢。

1 个答案:

答案 0 :(得分:1)

我建议您阅读这两个文件中的所有行并将其添加到Set(A Set不允许重复项目)。 然后迭代Set将其项目添加到合并文件。