Unix合并具有不同标头的2个csv文件

时间:2016-10-06 11:18:09

标签: unix

file1.csv

header1, header2, header3, header4, header5
a, b, c, d, e 

2 个答案:

答案 0 :(得分:0)

您可以尝试使用粘贴命令;

paste -d'\n' file1.csv <(awk -F',' 'NR>1{print $1",  ,"$2","$3","$4}' file2.csv)
用awk;

awk 'NR==1{print $0}; NR==FNR{a[FNR]=$0;next} ; FNR > 1 {print a[FNR]"\n" $1"  , "$2" "$3" "$4}' file1.csv file2.csv

例如;

user@host:/tmp/test$ awk 'NR==1{print $0}; NR==FNR{a[FNR]=$0;next} ; FNR > 1 {print a[FNR]"\n" $1"  , "$2" "$3" "$4}' file1.csv file2.csv 
header1, header2, header3, header4, header5
a, b, c, d, e
j,  , w, c, a

答案 1 :(得分:0)

以下是一般情况:跟踪所见的所有标题

数据:

$ cat file1.csv
header1, header2, header3, header4, header5
a, b, c, d, e

$ cat file2.csv
header1, header3, header4, header5
j, w, c, a

$ cat file3.csv
header1, header2, header3, header5, header6
1, 2, 3, 4, 5

file1缺少header6
file2缺少header2和header6
file3缺少header4

代码:

awk -F ", " -v OFS="," '
    FNR == 1 {
        delete this_file_headers
        for (i=1; i<=NF; i++) {
            this_file_headers[i] = $i
            if (!($i in all_headers)) {
                all_headers[$i]
                all_headers_ordered[++nhead] = $i
            } 
        }
        next
    }
    {
        lineno++
        for (i=1; i<=NF; i++)
            data[lineno,this_file_headers[i]] = $i
    }
    END {
        sep = ""
        for (i=1; i<=nhead; i++) {
            printf "%s%s", sep, all_headers_ordered[i]
            sep=FS
        }
        print ""

        for (i=1; i<=lineno; i++) {
            sep = ""
            for (j=1; j<=nhead; j++) {
                printf "%s%s", sep, data[i,all_headers_ordered[j]]
                sep=FS
            }
            print ""
        }
    }
' file{1,2,3}.csv

输出

header1, header2, header3, header4, header5, header6
a, b, c, d, e, 
j, , w, c, a, 
1, 2, 3, , 4, 5