数量和百分比

时间:2018-05-06 17:45:19

标签: bash awk

使用第4列和第2列,将创建一个类似于下面显示的输出文件的报告。我的代码工作正常,但我相信它可以做得更短:)。

我对分裂部分有疑问。

CNTLM = split ("20,30,40,60", LMT

它有效,但更好地拥有完全的价值观" 10,20,30,40"作为第4栏中的值。

4052538693,2910,04-May-2018-22,10
4052538705,2910,04-May-2018-22,10
4052538717,2910,04-May-2018-22,10
4052538729,2911,04-May-2018-22,20
4052538741,2911,04-May-2018-22,20
4052538753,2912,04-May-2018-22,20
4052538765,2912,04-May-2018-22,20
4052538777,2914,04-May-2018-22,10
4052538789,2914,04-May-2018-22,10
4052538801,2914,04-May-2018-22,30
4052539029,2914,04-May-2018-22,20
4052539041,2914,04-May-2018-22,20
4052539509,2915,04-May-2018-22,30
4052539521,2915,04-May-2018-22,30
4052539665,2915,04-May-2018-22,30
4052539677,2915,04-May-2018-22,10
4052539689,2915,04-May-2018-22,10
4052539701,2916,04-May-2018-22,40
4052539713,2916,04-May-2018-22,40
4052539725,2916,04-May-2018-22,40
4052539737,2916,04-May-2018-22,40
4052539749,2916,04-May-2018-22,40
4052539761,2917,04-May-2018-22,10
4052539773,2917,04-May-2018-22,10

这是我用来获取所需输出的代码。

printf " Code       10       20       30       40      Total\n"  > header
dd=`cat header | wc -L` 


awk -F"," '
BEGIN   {CNTLM = split ("20,30,40,60", LMT)
         cmdsort = "sort -nr"
         DASHES = sprintf ("%0*d", '$dd', _)
         gsub (/0/, "-", DASHES)
        }

        {for (IX=1; IX<=CNTLM; IX++) if ($4 <= LMT[IX]) break
         CNT[$2,IX]++ 
         COLTOT[IX]++
         LNC[$2]++
         TOT++
         }

END     {
     print DASHES

         for (l in LNC)
                {printf "%5d", l | cmdsort
                 for (IX=1; IX<=CNTLM; IX++)  {printf "%9d", CNT[l,IX]+0   | cmdsort
                                                }
                 printf "  = %6d" RS, LNC[l] | cmdsort
                }
         close (cmdsort)

         print DASHES
         printf "Total"
         for (IX=1; IX<=CNTLM; IX++) printf "%9d", COLTOT[IX]+0
         printf "  = %6d" RS, TOT
         print DASHES

         printf "PCT  "
         for (IX=1; IX<=CNTLM; IX++) printf "%9.1f", COLTOT[IX]/TOT*100
         printf RS
     print DASHES

        }
'  file

我收到的输出文件

 Code       10       20       30       40      Total
----------------------------------------------------
 2917        2        0        0        0  =      2
 2916        0        0        0        5  =      5
 2915        2        0        3        0  =      5
 2914        2        2        1        0  =      5
 2912        0        2        0        0  =      2
 2911        0        2        0        0  =      2
 2910        3        0        0        0  =      3
----------------------------------------------------
Total        9        6        4        5  =     24
----------------------------------------------------
PCT       37.5     25.0     16.7     20.8
----------------------------------------------------

感谢代码是否可以改进。

1 个答案:

答案 0 :(得分:1)

没有标题和化妆品......

$ awk -F, '{a[$2,$4]++; k1[$2]; k2[$4]} 
        END{for(r in k1) 
              {printf "%5s", r; 
               for(c in k2) {k1[r]+=a[r,c]; k2[c]+=a[r,c]; printf "%10d", OFS a[r,c]+0} 
               printf " =%7d\n", k1[r]}; 
            printf "%5s", "Total"; 
            for(c in k2) {sum+=k2[c]; printf "%10d", k2[c]} 
            printf " =%7d", sum}' file | sort -nr

 2917         2         0         0         0 =      2
 2916         0         0         0         5 =      5
 2915         2         0         3         0 =      5
 2914         2         2         1         0 =      5
 2912         0         2         0         0 =      2
 2911         0         2         0         0 =      2
 2910         3         0         0         0 =      3
Total         9         6         4         5 =     24