Question

我想根据列值显示多行中的各个条目。例如，在下面的示例中，我只希望来自第4列的用户具有来自第5列的大多数PD条目，并且在第7列中显示其各个条目的计数。示例输入：

column 4  column 5    column 7
abc           PD      8
xyz           PD      1
abc           PD      2
xyz           PD      7
xyz           PD      3
xyz           R       1

预期产出：

column 4  column 5    column 7
xyz           PD      3

我尝试使用squeue命令，因为我用来查找作为标准中PD最大的特定列的作业用户信息.cubsetting。

squeue | awk '($5 == "PD")'| awk '{a[$4]+=$7} END{for(i in a) print i,$5,a[i]}'| sort -r -k 3,3| head -n1


squeue | awk '($5 == "PD")'| uniq -r -k 3,3 | head -n1

我没有得到所需的答案。

Answer 1

请您试着跟随并告诉我这是否对您有帮助。

awk 'FNR==1{print;next}{a[$1,$2]++} END{for(i in a){b[a[i]]=i;val=val>a[i]?(val?val:a[i]):a[i]};print b[val]"\t"val}' SUBSEP="\t"  Input_file

输出如下。

column 4  column 5    column 7
xyz     PD      3

说明：添加一种非单一形式的解决方案也带有解释：

awk '
FNR==1{                                ##FNR==1 condition means when very first line of Input_file is being read.
 print;                                ##printing the current line on standard output then.
 next                                  ##Using next keyword will skip all further statements.
}
{
a[$1,$2]++                             ##Creating an array named a whose index is column 1 and column 2 here, also increasing their occurrences each time a similar entry comes to get the count of column 1 and column 2 as per OPs requirement.
}
END{
 for(i in a){                          ##using for loop to traverse trough array a all element.
   b[a[i]]=i;                          ##creating an array b whose index is the value of array a with index i(means putting array a value into index of array b here) and keeping array b value as i which is the index of array a.
   val=val>a[i]?(val?val:a[i]):a[i]};  ##creating a variable named val here, which will always check if its value is greater than new value of array a or not, if not then it will exchange the value with it, so that we could get the MAX value of column 3.
   print b[val]"\t"val                 ##printing the value of array b with index is val variable and printing TAB then with value of variable val.
}
' SUBSEP="\t" file218                  ##Setting SUBSEP to tab and mentioning Input_file here too.

Answer 2

awk -v OFS="\t" 'FNR==1{print;next}$2=="PD"{a[$1]++;if(a[$1]>max){max=a[$1];ind=$1}}END{print ind,"PD",a[ind]}' infile

更好的可读性

awk -v OFS="\t" '
                 FNR==1{
                      print;
                      next
                }
                $2=="PD"{
                      a[$1]++;
                      if(a[$1]>max)
                      {
                       max=a[$1];
                       ind=$1
                      }
                }
                END{
                     print ind,"PD",a[ind]
                }
              ' infile

或

使用GNU awk：

awk -v OFS="\t" ' BEGIN{ PROCINFO["sorted_in"]="@val_num_desc" } FNR==1{ print; next } $2=="PD"{ a[$1]++ } END{ for(i in a) { print i,"PD",a[i]; break } } ' infile

Answer 3

awk 解决方案：

$ awk 'BEGIN{OFS="\t"} \
       NR>1 && $2=="PD"{ a[$1]++;b[$1]=$3} \
       END{for(i in a) max=(a[max]<a[i]?i:max); \
           print max, "PD", b[max]}' file
xyz PD  3

如果你想添加整齐格式的标题：

$ cat tst.awk
BEGIN{FS="[[:space:]][[:space:]]+";OFS="\t"}
NR==1{ for(h=1;h<=NF;h++) printf "%s%s%s", (h>1?OFS:""),$h,(h==NF?"\n":"")
       l=length($NF)
       next
}
$2=="PD"{ a[$1]++;b[$1]=$3}
END{ for(i in a) max=(a[max]<a[i]?i:max)
     print pr(max,l), pr("PD",l), pr(b[max],l)
}
func pr(v,w){ return sprintf("%s%0*s",v,w-length(v)," ") }

给出：

$ awk -f tst.awk file
column 4    column 5    column 7
xyz         PD          3

Answer 4

另一个awk

$ awk -v k="PD" -v OFS='\t' 'NR==1{print;next} 
                             $2==k{a[$1]++} 
                             END  {n=asorti(a,i); print i[n], k, a[i[n]]}' file 
column 4  column 5  column 7
xyz   PD 3

识别linux中列中的各个条目

4 个答案: