使用CSV文件的摘要数据

时间:2018-08-11 18:43:16

标签: awk

我想在所需的输出文件中添加更多详细信息。

请提供以下代码的改进选项,以获取所需的文件。

代码

awk -F, '{a[$2$7]+=$4}{b[$2$7]+=$5}{c[$2$7]+=$6}END{for(i in a)print i,a[i],b[i],c[i]}' tmp3 | sort -t, -k1n |
awk 'BEGIN{
print ("\tCODE-1T     COD-Area   CODE-1      CODE-S       CODE-T")
printf ("\t------------------------------------------------------------\n")
}
{
sum2 += $2;
sum3 += $3;
sum4 += $4;
sum5 = sum2 + sum3 + sum4;
printf ("\t%9s%10s%12s%12d%16d\n",substr($0,1,9),substr($0,10,5),$2,$3,$4)
}
END {
printf ("\t------------------------------------------------------------------------\n")
printf ("\tTotal:\t%23d\t%11d\t%11d\t%4d\n",sum2,sum3,sum4,sum5)
printf ("\t------------------------------------------------------------------------\n")

输入文件

032118,333000004,3213,11,10,142,SS/RR
032118,333000004,3214,11,0,42,AS/RR
032118,333000004,3215,11,0,761,AS/RR
032118,333000005,3216,7,2,762,SS/RR
032118,333000005,3217,6,2,876,SS/RR
032118,333000005,3218,6,0,876,ST/RR
032118,333000005,3222,5,3,258,ST/RR
032118,333000006,3223,5,3,258,ST/RR
032118,333000006,3224,4,4,870,SS/RR
032118,333000006,3225,3,5,870,SS/RR
032118,333000007,3226,3,34,876,SX/RR
032118,333000007,3227,2,55,876,SS/RR
032218,333000007,3208,2,4,36,SS/RR
032218,333000007,3209,1,3,879,ST/RR
032218,333000007,3210,2,2,803,ST/RR

我得到了此输出文件

CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T
------------------------------------------------------------
333000004     AS/RR          22           0             803
333000004     SS/RR          11          10             142
333000005     SS/RR          13           4            1638
333000005     ST/RR          11           3            1134
333000006     SS/RR           7           9            1740
333000006     ST/RR           5           3             258
333000007     SS/RR           4          59             912
333000007     ST/RR           3           5            1682
333000007     SX/RR           3          34             876
------------------------------------------------------------------------
Total:                       79         127        9185 9391
------------------------------------------------------------------------

所需的输出如下

AWK有可能吗?在下面的示例中。

CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T
------------------------------------------------------------
333000004     AS/RR          22           0             803
333000004     SS/RR          11          10             142
Total                        33          10             945      988
---------------------------------------------------------------------
333000005     SS/RR          13           4            1638
333000005     ST/RR          11           3            1134
Total                        24           7            2772     2803
---------------------------------------------------------------------
333000006     SS/RR           7           9            1740
333000006     ST/RR           5           3             258
Total                        12          12            1998     2202
---------------------------------------------------------------------
333000007     SS/RR           4          59             912
333000007     ST/RR           3           5            1682
333000007     SX/RR           3          34             876
Total                        10          98            3470     3578
---------------------------------------------------------------------
---------------------------------------------------------------------
Gran Total:                 79          127            9185     9391
---------------------------------------------------------------------

预先感谢

2 个答案:

答案 0 :(得分:3)

编辑: 根据OP第4和5列的零值,先前的代码无法正常工作,因此现在可以对其进行修复。

awk '
BEGIN{                                                      ##Starting BEGIN section here of awk.
   FS=","                                                   ##Setting FS as comma here.
   OFS="\t\t"                                               ##Setting OFS as 2 TABs as output field separator.
   s1="------------------------------------------------------------------------------------------------"   ##Setting s1 as dashes.
   print "CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T" ORS s1  ##printing headers before output prints.
}
FNR==NR{                                                    ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
   code1[$2,$NF]+=$4                                        ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
   codes[$2,$NF]+=$5                                        ##Creating array codes index is $2,$NF value is $5 and adding to itself.
   codet[$2,$NF]+=$6                                        ##Creating array codet index is $2,$NF value is $6 and adding to itself.
   next                                                     ##next will skip all further statements from here.
}
prev!=$2 && prev{                                           ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
   sum_col=val1+val2+val3                                   ##creating sum_col whose value is val1+val2+val3.
   SUM+=sum_col                                             ##creating SUM whose value is sum_col and adding to itself too.
   sum_val1+=val1                                           ##Creating variable sum_val1 whose value is val1 and adding to itself.
   sum_val2+=val2                                           ##Creating variable sum_val2 whose value is val2 and adding to itself.
   sum_val3+=val3                                           ##Creating variable sum_val3 whose value is val3 and adding to itself.
   print "Total\t\t\t\t\t"val1,val2,val3,sum_col ORS s1              ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
   val1=val2=val3=""                                        ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]!=""{                                              ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
   print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]   ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
   val1+=code1[$2,$NF]                                      ##Creating variable val1 who is array code1 value and adding to itself.
   val2+=codes[$2,$NF]                                      ##Creating variable val2 who is array codes value and adding to itself.
   val3+=codet[$2,$NF]                                      ##Creating variable val3 who is array codet value and adding to itself.
   delete code1[$2,$NF]                                     ##Deleting array code1 whose index is $2,$NF here.
}
{
   prev=$2                                                  ##Setting prev value to $2.
}
END{                                                        ##Starting END block of awk here now.
   if(val1){                                                ##Checking condition if variable val1 is NOT NULL then do following.
      sum_col=val1+val2+val3                                ##Creating sum_col whose value is addition of val1+val2+val3.
      sum_val1+=val1                                        ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
      sum_val2+=val2                                        ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
      sum_val3+=val3                                        ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
      print "\t\t\t"val1,val2,val3,sum_col                  ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
   }
   print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1  ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}' Input_file  Input_file                                  ##mentioning Input_file 2 times here.


请您尝试以下。

awk -F, '
BEGIN{
   s1="------------------------------------------------------------------------------------------------"
   print "CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T" ORS s1
}
FNR==NR{
   code1[$2,$NF]+=$4
   codes[$2,$NF]+=$5
   codet[$2,$NF]+=$6
   next
}
prev!=$2 && prev{
   sum_col=val1+val2+val3
   SUM+=sum_col
   sum_val1+=val1
   sum_val2+=val2
   sum_val3+=val3
   print "\t\t\t"val1,val2,val3,sum_col ORS s1
   val1=val2=val3=""
}
code1[$2,$NF]{
   print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
   val1+=code1[$2,$NF]
   val2+=codes[$2,$NF]
   val3+=codet[$2,$NF]
   delete code1[$2,$NF]
}
{
   prev=$2
}
END{
   if(val1){
      sum_col=val1+val2+val3
      sum_val1+=val1
      sum_val2+=val2
      sum_val3+=val3
      print "\t\t\t"val1,val2,val3,sum_col
   }
   print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1
}'  OFS="\t\t"  Input_file  Input_file

说明: 也在此处添加说明。

awk '
BEGIN{                                                      ##Starting BEGIN section here of awk.
   FS=","                                                   ##Setting FS as comma here.
   OFS="\t\t"                                               ##Setting OFS as 2 TABs as output field separator.
   s1="------------------------------------------------------------------------------------------------"   ##Setting s1 as dashes.
   print "CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T" ORS s1  ##printing headers before output prints.
}
FNR==NR{                                                    ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
   code1[$2,$NF]+=$4                                        ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
   codes[$2,$NF]+=$5                                        ##Creating array codes index is $2,$NF value is $5 and adding to itself.
   codet[$2,$NF]+=$6                                        ##Creating array codet index is $2,$NF value is $6 and adding to itself.
   next                                                     ##next will skip all further statements from here.
}
prev!=$2 && prev{                                           ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
   sum_col=val1+val2+val3                                   ##creating sum_col whose value is val1+val2+val3.
   SUM+=sum_col                                             ##creating SUM whose value is sum_col and adding to itself too.
   sum_val1+=val1                                           ##Creating variable sum_val1 whose value is val1 and adding to itself.
   sum_val2+=val2                                           ##Creating variable sum_val2 whose value is val2 and adding to itself.
   sum_val3+=val3                                           ##Creating variable sum_val3 whose value is val3 and adding to itself.
   print "\t\t\t"val1,val2,val3,sum_col ORS s1              ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
   val1=val2=val3=""                                        ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]{                                              ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
   print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]   ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
   val1+=code1[$2,$NF]                                      ##Creating variable val1 who is array code1 value and adding to itself.
   val2+=codes[$2,$NF]                                      ##Creating variable val2 who is array codes value and adding to itself.
   val3+=codet[$2,$NF]                                      ##Creating variable val3 who is array codet value and adding to itself.
   delete code1[$2,$NF]                                     ##Deleting array code1 whose index is $2,$NF here.
}
{
   prev=$2                                                  ##Setting prev value to $2.
}
END{                                                        ##Starting END block of awk here now.
   if(val1){                                                ##Checking condition if variable val1 is NOT NULL then do following.
      sum_col=val1+val2+val3                                ##Creating sum_col whose value is addition of val1+val2+val3.
      sum_val1+=val1                                        ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
      sum_val2+=val2                                        ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
      sum_val3+=val3                                        ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
      print "\t\t\t"val1,val2,val3,sum_col                  ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
   }
   print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1  ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}'  Input_file  Input_file                                 ##mentioning Input_file 2 times here.

答案 1 :(得分:2)

不是完整的解决方案(我现在对格式没有足够的耐心,但是在一个字段上显示了小计的主要思想,您可以概括和复制其他格式...

$ awk -F, '{k=$2 OFS $7; f2[$2]; f7[$7]; f4[k]+=$4; f5[k]+=$5; f6[k]+=$6} 
        END{for(i2 in f2) 
              {s4=0; 
               for(i7 in f7) 
                  {k=i2 OFS i7; 
                   if(k in f4) 
                      {print k, f4[k]; 
                       s4+=f4[k]}} 
               print "Total","-",s4}}' file | column -t


333000004  SS/RR  11
333000004  AS/RR  22
Total      -      33
333000005  SS/RR  13
333000005  ST/RR  11
Total      -      24
333000006  SS/RR  7
333000006  ST/RR  5
Total      -      12
333000007  SS/RR  4
333000007  SX/RR  3
333000007  ST/RR  3
Total      -      10