awk排序计数值对重复项和转置列求和

时间:2019-01-18 01:00:26

标签: sorting awk count multiple-columns transpose

我正在尝试获取更易读格式的输出,因为尝试基于uniq列nr4和nr2来获取,其中OK是status,X是“ KO” status, 属于nr1列,其中不是一直都存在,甚至规则为nr.3中的F1 F2 ..值具有OK状态。

开始使用此输入:

输入:

NR5X1 OK F1 SEAT5
NR5X1 OK F2 SEAT5
NR5X1 X F3 SEAT5
NR5X1 X F4 SEAT5
NR5X1 X F5 SEAT5
NR5X1 X F6 SEAT5
NR5X1 X F7 SEAT5
NR5X1 X F8 SEAT5
NR5X2 OK F1 SEAT5
NR5X2 OK F2 SEAT5
NR5X2 X F3 SEAT5
NR5X2 X F4 SEAT5
NR5X2 X F5 SEAT5
NR5X2 X F6 SEAT5
NR5X2 X F7 SEAT5
NR5X2 X F8 SEAT5
NR5X3 OK F5 FLEET
NR5X3 OK F6 FLEET
NR5X5 OK F5 FLEET
NR5X5 OK F6 FLEET
NR5X7 F5 X ROME
NR5X7 F6 X ROME
NR5X8 F5 X ROME
NR5X8 F6 OK ROME

并尝试实现此输出

2 OK of 8 | SEAT5 NR5X1 OK F1 OK F2 X F3 X F4 X F5 X F6 X F7 X F8
2 OK of 8 | SEAT5 NR5X2 OK F1 OK F2 X F3 X F4 X F5 X F6 X F7 X F8
2 OK of 2 | FLEET NR5X3 OK F5 OK F6
2 OK of 2 | FLEET NR5X5 OK F5 OK F6
2 X  of 2 | ROME  NR5X7 X  F5 X  F6
1 OK of 2 | ROME  NR5X8 X  F5 OK F6

使用此代码部分移动:

cat file | awk '!seen[$1]++ {print $4,$1} {print $3,$2}',多个名称上的xargs将全部加入一行,所以我被卡在这里

ROME NR5X7
X F5
X F6
ROME NR5X8
X F5
OK F6

不确定是否还有其他简单的方法可以美化输出,但这实际上是最后一件事

再欢迎任何建议,美好的一天

2 个答案:

答案 0 :(得分:0)

尝试此Perl解决方案

$ seats.ksh ya801.txt
2 OK of 2 | FLEET NR5X3 OK F5 OK F6
1 OK of 2 | ROME NR5X8 X F5 OK F6
2 OK of 2 | FLEET NR5X5 OK F5 OK F6
2 OK of 8 | SEAT5 NR5X1 OK F1 OK F2 X F3 X F4 X F5 X F6 X F7 X F8
2 X of 2 | ROME NR5X7 X F5 X F6
2 OK of 8 | SEAT5 NR5X2 OK F1 OK F2 X F3 X F4 X F5 X F6 X F7 X F8

脚本:

$ cat seats.ksh
perl -lane '
$x="$F[3] $F[0]";
$kv{$x}++;
@t=@{$kv2{$x}};
push(@t,"$F[1] $F[2]");
$kv2{$x}=[@t];
@tok=@{$kvok{$x}};
if ( $F[1] eq "OK" ) { push(@tok,$F[1]); $kvok{$x}=[@tok] }
@tx=@{$kvx{$x}};
if ( $F[1] eq "X" ) { push(@tx,$F[1]); $kvx{$x}=[@tx] }
END
{
        foreach $p (keys %kv)
        {
         @oks=@{$kvok{$p}};
         @xs=@{$kvx{$p}};
         if( scalar @oks ) { $okcount=sprintf("%d OK of %d | ",scalar @oks, $kv{$p}) }
         else { $okcount=sprintf("%d X of %d | ",scalar @xs, $kv{$p}) }
         print $okcount, "$p ", join(" ",@{$kv2{$p}})
        }
}
' $1
$

注意:好像第2列和第3列在最后4行中被交换了。所以我更改了它们

$ cat ya801.txt
NR5X1 OK F1 SEAT5
NR5X1 OK F2 SEAT5
NR5X1 X F3 SEAT5
NR5X1 X F4 SEAT5
NR5X1 X F5 SEAT5
NR5X1 X F6 SEAT5
NR5X1 X F7 SEAT5
NR5X1 X F8 SEAT5
NR5X2 OK F1 SEAT5
NR5X2 OK F2 SEAT5
NR5X2 X F3 SEAT5
NR5X2 X F4 SEAT5
NR5X2 X F5 SEAT5
NR5X2 X F6 SEAT5
NR5X2 X F7 SEAT5
NR5X2 X F8 SEAT5
NR5X3 OK F5 FLEET
NR5X3 OK F6 FLEET
NR5X5 OK F5 FLEET
NR5X5 OK F6 FLEET
NR5X7 X F5 ROME 
NR5X7 X F6 ROME
NR5X8 X F5 ROME
NR5X8 OK F6 ROME
$

说明:

perl -lane '
$x="$F[3] $F[0]";  # Capture 4 & 1 col together

$kv{$x}++; # hash "kv" for the overall count i.e the 8 in "2 OK of 8"

@t=@{$kv2{$x}};  # dereference the array in @t i.e 2nd and 3rd column together 
push(@t,"$F[1] $F[2]"); # append to the array with new value if 2 & 3 col together
$kv2{$x}=[@t]; # reassign it bact to hash "kv2"

@tok=@{$kvok{$x}};  # dereference the array in @tok for capturing OK 
if ( $F[1] eq "OK" ) { push(@tok,$F[1]); $kvok{$x}=[@tok] }  # append only if "OK" is present in the 2nd column =>  uses hash kvok

@tx=@{$kvx{$x}};  # dereference the array in @tx for capturing X 
if ( $F[1] eq "X" ) { push(@tx,$F[1]); $kvx{$x}=[@tx] }  # append only if "X" is present in the 2nd column => uses hash kvx
END
{
        foreach $p (keys %kv)  # loop through the kv hash
        {

         @oks=@{$kvok{$p}}; # dereference and get the OK array from kvok hash
         @xs=@{$kvx{$p}}; # dereference and get the X array from kvx hash

         if( scalar @oks ) { $okcount=sprintf("%d OK of %d | ",scalar @oks, $kv{$p}) }  # scalar @oks gives the array size. Only if the OK count is > 1 then print like "2 OK of 8"
         else { $okcount=sprintf("%d X of %d | ",scalar @xs, $kv{$p}) }   # false then ise the X array

         print $okcount, "$p ", join(" ",@{$kv2{$p}})  # print all of them together hash kv2 contains the arrays of 2&3 col together

        }
}
' $1

答案 1 :(得分:0)

$ cat tst.awk
$1 != prev {
    if (NR > 1) {
        prt()
    }
    prev = $1
}
{
    key  = $1
    type = $4
    if (type == "ROME") {
        stat = $3
        fval = $2
    }
    else {
        stat = $2
        fval = $3
    }
    statCnt[stat]++
    totCnt++
    pairs = pairs OFS sprintf("%-2s %-2s",stat,fval)
}
END { prt() }
function prt(   stat) {
    stat = ("OK" in statCnt ? "OK" : "X")
    printf "%d %-2s of %d | %-5s %s%s\n", statCnt[stat], stat, totCnt, type, key, pairs
    delete statCnt
    totCnt = 0
    pairs  = ""
}

$ awk -f tst.awk file
2 OK of 8 | SEAT5 NR5X1 OK F1 OK F2 X  F3 X  F4 X  F5 X  F6 X  F7 X  F8
2 OK of 8 | SEAT5 NR5X2 OK F1 OK F2 X  F3 X  F4 X  F5 X  F6 X  F7 X  F8
2 OK of 2 | FLEET NR5X3 OK F5 OK F6
2 OK of 2 | FLEET NR5X5 OK F5 OK F6
2 X  of 2 | ROME  NR5X7 X  F5 X  F6
1 OK of 2 | ROME  NR5X8 X  F5 OK F6