将CSV文件合并到单个文件中,没有重复的标题

时间:2013-08-02 15:10:47

标签: java csv

我有一些具有相同列标题的CSV文件。例如

档案A

header1,header2,header3
one,two,three
four,five,six

档案B

header1,header2,header3
seven,eight,nine
ten,eleven,twelve

我想合并它,以便将数据合并到一个文件中,标题位于顶部,但在其他地方没有标题。

header1,header2,header3
one,two,three
four,five,six
seven,eight,nine
ten,eleven,twelve

实现这一目标的好方法是什么?

5 个答案:

答案 0 :(得分:4)

以下是一个例子:

public static void main(String[] args) throws IOException {
    List<Path> paths = Arrays.asList(Paths.get("c:/temp/file1.csv"), Paths.get("c:/temp/file2.csv"));
    List<String> mergedLines = getMergedLines(paths);
    Path target = Paths.get("c:/temp/merged.csv");
    Files.write(target, mergedLines, Charset.forName("UTF-8"));
}

private static List<String> getMergedLines(List<Path> paths) throws IOException {
    List<String> mergedLines = new ArrayList<> ();
    for (Path p : paths){
        List<String> lines = Files.readAllLines(p, Charset.forName("UTF-8"));
        if (!lines.isEmpty()) {
            if (mergedLines.isEmpty()) {
                mergedLines.add(lines.get(0)); //add header only once
            }
            mergedLines.addAll(lines.subList(1, lines.size()));
        }
    }
    return mergedLines;
}

答案 1 :(得分:3)

在Java中执行此操作似乎有点重量级。它在Linux shell中是微不足道的:

(cat FileA ; tail --lines=+2 FileB) > FileC

答案 2 :(得分:2)

这应该有效。它检查正在合并的文件是否具有匹配的标头。否则会抛出异常。异常处理(关闭流等)已被留作练习。

String[] headers = null;
String firstFile = "/path/to/firstFile.dat";
Scanner scanner = new Scanner(new File(firstFile));

if (scanner.hasNextLine())
    headers[] = scanner.nextLine().split(",");

scanner.close();

Iterator<File> iterFiles = listOfFilesToBeMerged.iterator();
BufferedWriter writer = new BufferedWriter(new FileWriter(firstFile, true));

while (iterFiles.hasNext()) {
  File nextFile = iterFiles.next();
  BufferedReader reader = new BufferedReader(new FileReader(nextFile));

  String line = null;
  String[] firstLine = null;
  if ((line = reader.readLine()) != null)
    firstLine = line.split(",");

  if (!Arrays.equals (headers, firstLine))
    throw new FileMergeException("Header mis-match between CSV files: '" +
              firstFile + "' and '" + nextFile.getAbsolutePath());

  while ((line = reader.readLine()) != null) {
    writer.write(line);
    writer.newLine();
  }

  reader.close();
}
writer.close();

答案 3 :(得分:1)

最后,但Fuzzy-Csv(https://github.com/kayr/fuzzy-csv/)正是为此而设计的。

这就是代码的样子

        String csv1 = "NAME,SURNAME,AGE\n" +
                "Fred,Krueger,Unknown";

        String csv2 = "NAME,MIDDLENAME,SURNAME,AGE\n" +
                "Jason,Noname,Scarry,16";

        FuzzyCSVTable t1 = FuzzyCSVTable.parseCsv(csv1);
        FuzzyCSVTable t2 = FuzzyCSVTable.parseCsv(csv2);

        FuzzyCSVTable output = t1.mergeByColumn(t2);

        output.printTable();

输出

╔═══════╤═════════╤═════════╤════════════╗
║ NAME  │ SURNAME │ AGE     │ MIDDLENAME ║
╠═══════╪═════════╪═════════╪════════════╣
║ Fred  │ Krueger │ Unknown │ -          ║
╟───────┼─────────┼─────────┼────────────╢
║ Jason │ Scarry  │ 16      │ Noname     ║
╚═══════╧═════════╧═════════╧════════════╝

您可以使用一种辅助方法重新导出csv

output.write("FilePath.csv");

or 

output.toCsvString()

答案 4 :(得分:0)

之前:

  

idFile#x_y.csv

后:

  

idFile.csv

例如:

  

100#1_2.csv + 100#2_2.csv&gt; 100.csv

100#1_2.csv包含:

"one","two","three"
"a","b","c"
"d","e","f"

100#2_2.csv包含:

"one","two","three"
"g","h","i"
"j","k","l"

100.csv包含:

"one","two","three"
"a","b","c"
"d","e","f"    
"g","h","i"
"j","k","l"

来源:

//MergeDemo.java
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
//import java.util.Arrays;
import java.util.Iterator;
import java.util.Scanner;

public class MergeDemo {

    public static void main(String[] args) {

        String idFile = "100";
        int numFiles = 3;

        try {
            mergeCsvFiles(idFile, numFiles);
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    private static void mergeCsvFiles(String idFile, int numFiles) throws IOException {

        // Variables
        ArrayList<File> files = new ArrayList<File>();
        Iterator<File> iterFiles;
        File fileOutput;
        BufferedWriter fileWriter;
        BufferedReader fileReader;
        String csvFile;
        String csvFinal = "C:\\out\\" + idFile + ".csv";
        String[] headers = null;
        String header = null;

        // Files: Input
        for (int i = 1; i <= numFiles; i++) {
            csvFile = "C:\\in\\" + idFile + "#" + i + "_" + numFiles + ".csv";
            files.add(new File(csvFile));
        }

        // Files: Output
        fileOutput = new File(csvFinal);
        if (fileOutput.exists()) {
            fileOutput.delete();
        }
        try {
            fileOutput.createNewFile();
            // log
            // System.out.println("Output: " + fileOutput);
        } catch (IOException e) {
            // log
        }

        iterFiles = files.iterator();
        fileWriter = new BufferedWriter(new FileWriter(csvFinal, true));

        // Headers
        Scanner scanner = new Scanner(files.get(0));
        if (scanner.hasNextLine())
            header = scanner.nextLine();
        // if (scanner.hasNextLine()) headers = scanner.nextLine().split(";");
        scanner.close();

        /*
         * System.out.println(header); for(String s: headers){
         * fileWriter.write(s); System.out.println(s); }
         */

        fileWriter.write(header);
        fileWriter.newLine();

        while (iterFiles.hasNext()) {

            String line;// = null;
            String[] firstLine;// = null;

            File nextFile = iterFiles.next();
            fileReader = new BufferedReader(new FileReader(nextFile));

            if ((line = fileReader.readLine()) != null)
                firstLine = line.split(";");

            while ((line = fileReader.readLine()) != null) {
                fileWriter.write(line);
                fileWriter.newLine();
            }
            fileReader.close();
        }

        fileWriter.close();

    }

}