#!/bin/bash
function compare {
for file1 in /dir1/*.csv
do
file2=/dir2/$(basename "$file1")
if [[ -e "$file2" ]] ### loop only if the file2 with same filename as file1 is present ###
then
awk 'BEGIN {FS==","} NR == FNR{arr[$0];next} ! ($0 in arr)' $file1 $file2 > /dirDiff/`echo $(basename "$file1")_diff`
fi
done
}
function removeNULL {
for i in /dirDiff/*_diff
do
if [[ ! -s "$i" ]] ### if file exists with zero size ###
then
\rm -- "$i"
fi
done
}
compare
removeNULL
file1和file2是来自两个不同来源的格式化文件。 Source1正在诱导一个任意的newLine字符,使一条记录分成两条记录,导致脚本失败并生成错误的diff o / p。 我希望我的脚本通过忽略Source1引起的newLine字符来比较b / w file1和file2。但是,我不确定我的脚本将如何识别实际新记录和手动引发的newLine。
file1:-
11447438218480362,6005560623,6005560623,11447438218480362,5,20160130103044,100,195031,,1,0,00,49256,0
,195031_5_00_6,0.1,6;
11447691224860640,6997557634,6997557634,11447691224860640,601511,20160130103457,500,195035,,2,0,00,45394,0
,195035_601511_00_6,0.5,6;
file2:-
11447438218480362,6005560623,6005560623,11447438218480362,5,20160130103044,100,195031,,1,0,00,49256,0,195031_5_00_6,0.1,6;
11447691224860640,6997557634,6997557634,11447691224860640,601511,20160130103457,500,195035,,2,0,00,45394,0,195035_601511_00_6,0.5,6;
感谢您的支持。
答案 0 :(得分:1)
您可以使用下一行预处理您的file1连接不以;
结尾的行:
sed -r ":again; /;$/! { N; s/(.+)[\r\n]+(.+)/\1\2/g; b again; }" file1
以便file1和file2 可比较。