想要打印连续序列号 - 从第一个字段开始序列和结束序列,以及$ 2,substr($ 3,1,9),substr($ 4,4,6),$ 6,$ 8,$ 10字段的组合。 输入文件未按第一列排序。
INPUT.TXT
11,abc,22-JUN-12.08:06:03,22-JUN-12.08:06:03,19-Apr-16,1,INR,RO0412,RC03,L7,,31
12,abc,22-JUN-12.08:06:03,22-JUN-12.08:06:03,19-Apr-16,1,INR,RO0412,RC03,L7,,31
13,abc,22-JUN-12.08:06:03,22-JUN-12.08:06:03,19-Apr-16,1,INR,RO0412,RC03,L7,,31
14,abc,30-JUN-12.01:06:49,30-JUN-12.01:06:49,19-Apr-16,1,INR,RO0412,RC03,L7,,29
28,abc,30-JUN-12.01:06:49,30-JUN-12.01:06:49,19-Apr-16,1,INR,RO0412,RC03,L7,,29
32,def,29-MAY-13.12:05:11,29-MAY-13.12:05:11,15-Feb-17,1350,INR,RO0213,CD,K1,,30
33,def,29-MAY-13.12:05:11,29-MAY-13.12:05:11,15-Feb-17,1350,INR,RO0213,CD,K1,,30
41,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
50,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
51,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
52,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
仅针对第一个字段尝试了此命令并获得了部分输出:
cat Input.txt | sort -k1 -t,| awk -F, 'NR==1 {a=$1;b=$1;next} ($1 != b+1){print a,"-",b; a=$1} {b=$1} END{print a,"-",b}'
11 - 14
28 - 28
32 - 33
41 - 41
50 - 52
期望的输出:
abc,22-JUN-12,JUN-12,1,RO0412,L7,11,13
abc,30-JUN-12,JUN-12,1,RO0412,L7,14,14
abc,30-JUN-12,JUN-12,1,RO0412,L7,28,28
def,29-MAY-13,MAY-13,1350,RO0213,K1,32,33
abc,20-FEB-14,FEB-14,650,EN1113,S317,41,41
abc,20-FEB-14,FEB-14,650,EN1113,S317,50,52
编辑:不按排序顺序更新SampleInput.txt,Ed Morton你是对的,我的实际输入文件不按排序顺序,想知道如何点蜡下面的样本。
13,abc,22-JUN-12.08:06:03,22-JUN-12.08:06:03,19-Apr-16,1,INR,RO0412,RC03,L7,,31
14,abc,30-JUN-12.01:06:49,30-JUN-12.01:06:49,19-Apr-16,1,INR,RO0412,RC03,L7,,29
11,abc,22-JUN-12.08:06:03,22-JUN-12.08:06:03,19-Apr-16,1,INR,RO0412,RC03,L7,,31
12,abc,22-JUN-12.08:06:03,22-JUN-12.08:06:03,19-Apr-16,1,INR,RO0412,RC03,L7,,31
28,abc,30-JUN-12.01:06:49,30-JUN-12.01:06:49,19-Apr-16,1,INR,RO0412,RC03,L7,,29
32,def,29-MAY-13.12:05:11,29-MAY-13.12:05:11,15-Feb-17,1350,INR,RO0213,CD,K1,,30
33,def,29-MAY-13.12:05:11,29-MAY-13.12:05:11,15-Feb-17,1350,INR,RO0213,CD,K1,,30
41,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
50,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
52,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
51,abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,31-Dec-20,650,INR,EN1113,ch650,S317,,28
答案 0 :(得分:2)
像这样的剧本可能会这样做。我希望有人不只是做一个浓缩版本:
#!/usr/bin/awk -f
BEGIN {
FS = OFS = ","
}
{
# sub(/[.].*$/, "", $3) ## Uncomment if you don't want to include the time.
# sub(/[.].*$/, "", $4) ## Uncomment if you don't want to include the time.
key = $2 "," $3 "," $4 "," $6 "," $8 "," $10
if (!(key in s)) {
s[key] = e[key] = $1
keys[k++] = key
} else if ($1 < s[key]) {
s[key] = $1
} else if ($1 > e[key]) {
e[key] = $1
}
}
END {
for (k = 0; k in keys; ++k) {
key = keys[k]
print key, s[key], e[key]
}
}
也许类似:
#!/usr/bin/awk -f
BEGIN {
FS = OFS = ","
}
{
# sub(/[.].*$/, "", $3)
# sub(/[.].*$/, "", $4)
key = $2 "," $3 "," $4 "," $6 "," $8 "," $10
}
!s[key] {
s[key] = e[key] = $1
keys[k++] = key
next
}
$1 < s[key] {
s[key] = $1
next ## Optional.
}
$1 > e[key] {
e[key] = $1
}
END {
for (k = 0; k in keys; ++k) {
key = keys[k]
print key, s[key], e[key]
}
}
使用
awk -f script.awk file
输出:
abc,22-JUN-12.08:06:03,22-JUN-12.08:06:03,1,RO0412,L7,11,13
abc,30-JUN-12.01:06:49,30-JUN-12.01:06:49,1,RO0412,L7,14,28
def,29-MAY-13.12:05:11,29-MAY-13.12:05:11,1350,RO0213,K1,32,33
abc,20-FEB-14.11:02:37,20-FEB-14.11:02:37,650,EN1113,S317,41,52
排除时间的输出(取消注释sub()
行):
abc,22-JUN-12,22-JUN-12,1,RO0412,L7,11,13
abc,30-JUN-12,30-JUN-12,1,RO0412,L7,14,28
def,29-MAY-13,29-MAY-13,1350,RO0213,K1,32,33
abc,20-FEB-14,20-FEB-14,650,EN1113,S317,41,52
答案 1 :(得分:0)
我相信这会产生你想要的输出(与你所显示的相同)。
sort -k1 -t, Input.txt |
awk '
function prn() {print f2,d1,substr(f4,1,6),f6,f8,f10,n1,n2}
function sav() {n1=$1;d1=d;f2=$2;f4=$4;f6=$6;f8=$8;f10=$10}
BEGIN {FS=OFS=","}
{d = substr($3,1,9)}
NR == 1 {sav(); n2=n1; d2=d1; next}
$1 != n2 + 1 || d1 != d {prn(); sav()}
{n2=$1; d2=d}
END {prn()}
'
我假设您实际上想要第一个 6个字段4(日期和月份)而不是最后6个月(月和年)。
答案 2 :(得分:0)
$ cat tst.awk
BEGIN{ FS=OFS="," }
{
seq = $1
key = $2 FS substr($3,1,9) FS substr($4,4,6) FS $6 FS $8 FS $10
if ( (seq != (prevSeq+1)) || (key != prevKey) ) {
if (startSeq != "")
print prevKey, startSeq, prevSeq
startSeq = seq
}
prevSeq = seq
prevKey = key
}
END {
print key, startSeq, prevSeq
}
$
$ awk -f tst.awk file
abc,22-JUN-12,JUN-12,1,RO0412,L7,11,13
abc,30-JUN-12,JUN-12,1,RO0412,L7,14,14
abc,30-JUN-12,JUN-12,1,RO0412,L7,28,28
def,29-MAY-13,MAY-13,1350,RO0213,K1,32,33
abc,20-FEB-14,FEB-14,650,EN1113,S317,41,41
abc,20-FEB-14,FEB-14,650,EN1113,S317,50,52