我有2个CSV文件,我想用AWK加入它们。
file1.csv:
{{1}}
file2.csv:
{{1}}
这是我想要的输出:
{{1}}
我想在文件1和文件2上进行完全连接,其中A1 = A2。 File2的行数多于file1。对于没有匹配列值的记录,将插入NULL值。
答案 0 :(得分:3)
为简单起见,可以使用标准join
实用程序。
注意:join需要排序输入,因此解决方案必须首先对输入进行排序
示例加入
tail -n +2 file1.csv | sort -k 1 1>file3.csv;
tail -n +2 file2.csv | sort -k 1 1>file4.csv;
paste -d, file1.csv file2.csv | head -n 1 1>output.txt;
join -a 1 -a 2 -t , -e NULL -1 1 -2 1 \
-o 1.1,1.2,1.3,2.1,2.2,2.3,2.4 \
file3.csv file4.csv 1>>output.txt;
<强>输出强>
A1,B1,C1,A2,D2,E2,F2
"apple",1,2,"apple",1,3,4
NULL,NULL,NULL,"mango",6,5,1
"orange",2,3,NULL,NULL,NULL,NULL
NULL,NULL,NULL,"peach",2,3,3
"pear",5,4,"pear",5,4,2
答案 1 :(得分:2)
您可以使用此awk
:
awk -F, 'FNR==1{if (NR==1)print "A1,B1,C1,A2,D2,E2,F2";next}
FNR==NR{a[$1]=$0;next}
{print $0 FS (($1 in a)? a[$1]:"NULL,NULL,NULL,NULL"); delete a[$1]}
END{for (i in a) print "NULL,NULL,NULL," a[i]}' file2.csv file1.csv
A1,B1,C1,A2,D2,E2,F2
"apple",1,2,"apple",1,3,4
"orange",2,3,NULL,NULL,NULL,NULL
"pear",5,4,"pear",5,4,2
NULL,NULL,NULL,"mango",6,5,1
NULL,NULL,NULL,"peach",2,3,3
答案 2 :(得分:1)
试试这个:
awk -F',' 'BEGIN{flag=2}NR==FNR{if(flag==2){head=$0;--flag;}else{a[$1]=$0}}
NR>FNR{if(flag==1){print head","$0;flag=0}else{if(a[$1]){print a[$1],$0;delete a[$1]}
else{print "NULL,NULL,NULL,"$0}}}END{for(i in a){if(a[i]){print a[i]",NULL,NULL,NULL,NULL"}}}'
file1.csv file2.csv
输出:
A1,B1,C1,A2,D2,E2,F2
"apple",1,2 "apple",1,3,4
NULL,NULL,NULL,"peach",2,3,3
"pear",5,4 "pear",5,4,2
NULL,NULL,NULL,"mango",6,5,1
"orange",2,3,NULL,NULL,NULL,NULL
答案 3 :(得分:0)
为了完整起见,bash脚本也可以通过一些限制来解决这个问题。以下适用于该示例,但期望在每个输入文件中的相同行上匹配行。如果csv文件不是这种情况,则需要对文件进行简单的预排序。
这比竞争者更像是一种练习,但最终以与其他一些解决方案相同或更好的顺序满足条件。如果您有任何疑问,请告诉我们:
#!/bin/bash
declare -i l1=0 # lines in file 1
declare -i l2=0 # lines in file 2
declare -i a1s=0 # array 1 stride
declare -i a2s=0 # array 2 stride
while read -r line; do ## fill array from file1
a1+=( $(tr ',' ' ' <<<$line) )
((l1++))
done <"$1"
while read -r line; do ## fill array from file2
a2+=( $(tr ',' ' ' <<<$line) )
((l2++))
done <"$2"
a1s=$((${#a1[@]}/l1)) ## stride of array 1
a2s=$((${#a2[@]}/l2)) ## stride of array 2
[ $l1 -lt $l2 ] && lim=$l1 || lim=$l2 ## which has more rows?
for ((i = 0; i < lim; i++)); do ## for common rows
if [ $i -eq 0 -o ${a1[$((i*a1s))]} = ${a2[$((i*a2s))]} ]; then
for ((j = 0; j < a1s; j++)); do
[ $j -eq 0 ] && printf "%s" ${a1[$((i*a1s+j))]} || printf ",%s" ${a1[$((i*a1s+j))]}
done
for ((j = 0; j < a2s; j++)); do printf ",%s" ${a2[$((i*a2s+j))]}; done
printf "\n"
else
for ((j = 0; j < a1s; j++)); do
[ $j -eq 0 ] && printf "%s" ${a1[$((i*a1s+j))]} || printf ",%s" ${a1[$((i*a1s+j))]}
done
for ((j = 0; j < a2s; j++)); do printf ",NULL"; done
printf "\n"
for ((j = 0; j < a1s; j++)); do
[ $j -eq 0 ] && printf "NULL" || printf ",NULL"
done
for ((j = 0; j < a2s; j++)); do printf ",%s" ${a2[$((i*a2s+j))]}; done
printf "\n"
fi
done
if [ $l1 -lt $l2 ]; then ## for excess rows (longest row-wise)
last=$l2
for ((i = lim; i < last; i++)); do
for ((j = 0; j < a1s; j++)); do
[ $j -eq 0 ] && printf "NULL" || printf ",NULL"
done
for ((j = 0; j < a2s; j++)); do printf ",%s" ${a2[$((i*a2s+j))]}; done
printf "\n"
done
else
last=$l1
for ((i = lim; i < last; i++)); do
for ((j = 0; j < a1s; j++)); do
[ $j -eq 0 ] && printf "%s" ${a1[$((i*a1s+j))]} || printf ",%s" ${a1[$((i*a1s+j))]}
done
for ((j = 0; j < a2s; j++)); do printf ",NULL"; done
printf "\n"
done
fi
exit 0
<强>输出强>
$ bash ../read2redir.sh A1.txt A2.txt
A1,B1,C1,A2,D2,E2,F2
"apple",1,2,"apple",1,3,4
"orange",2,3,NULL,NULL,NULL,NULL
NULL,NULL,NULL,"peach",2,3,3
"pear",5,4,"pear",5,4,2
NULL,NULL,NULL,"mango",6,5,1