如何使用awk在多列.tsv文件中按行第一列值聚合(求和)行?

时间:2017-03-02 14:09:12

标签: bash awk sum aggregate

有几个例子,但总是只有一列的文件需要聚合,例如

awk '{arr[$1]+=$2;} END {for (i in arr) print i, arr[i]}' filename

如何对包含数百列的文件执行相同的操作?输出也应该是数百列。

输入

a 1 2 4 1 ...
b 1 2 1 2 ...
b 2 1 1 1 ...

输出

a 1 2 4 1 ...
b 3 3 2 3 ...

2 个答案:

答案 0 :(得分:2)

使用custom codec如下:

awk '{                                 # Call awk
       b[$1];                          # array b with index being first field 

       for(i=2;i<=NF;i++)              # loop from 2nd field to last field, NF gives total no of fields in record/line
       a[$1,i]+=$i                     # array a where index being first field SUBSEP and current value of i
                                       # $i gives current field value,
                                       # using += value will be summed whenever it encounters with same array index

     }

 END{                                  # end block here we process 
      for( i in b)                     # loop through array b
      {
          printf("%s",i)               # print index value which forms  1st column
          for(j=2;j<=NF;j++)           # loop through 2nd field to last field
          {
             printf("%s%s",OFS,a[i,j]) # print o/p sep and array element
          } 
          print "";                    # print new line
      } 
    }' file

<强>一衬垫

awk '{ b[$1]; for(i=2;i<=NF;i++)a[$1,i]+=$i }END{for( i in b){printf("%s",i);for(j=2;j<=NF;j++){printf("%s%s",OFS,a[i,j])} print ""}}' file

<强>输入

akshay@db-3325:/tmp$ cat file
a 1 2 4 1
b 1 2 1 2
b 2 1 1 1

<强>输出

akshay@db-3325:/tmp$ awk '{ 
       b[$1]; 
       for(i=2;i<=NF;i++)
       a[$1,i]+=$i 
     }
 END{
      for( i in b)
      {
          printf("%s",i)
          for(j=2;j<=NF;j++)
          {
              printf("%s%s",OFS,a[i,j])
          } 
          print "";
      } 
    }' file
a 1 2 4 1
b 3 3 2 3

如果文件的第一个字段已排序,则

awk '
function print_and_clear(i,j)
{
      for( i in b)
      {
          printf("%s",i)
          for(j=2;j<=NF;j++)
          {
              printf("%s%s",OFS,a[i,j])
          } 
          print "";
      } 
    split("",a)
    split("",b)
}
p!=$1{
  if(i){
    print_and_clear()
  }
  p = $1
}
{
    b[$1]
    for(i=2; i<=NF; i++)
    a[$1,i]+=$i
}
END{
    print_and_clear()
}
' file

答案 1 :(得分:0)

.jade按第一个字段排序时使用awk:

file