我想根据列值显示多行中的各个条目。例如,在下面的示例中,我只希望来自第4列的用户具有来自第5列的大多数PD条目,并且在第7列中显示其各个条目的计数。 示例输入:
column 4 column 5 column 7
abc PD 8
xyz PD 1
abc PD 2
xyz PD 7
xyz PD 3
xyz R 1
预期产出:
column 4 column 5 column 7
xyz PD 3
我尝试使用squeue命令,因为我用来查找作为标准中PD最大的特定列的作业用户信息.cubsetting。
squeue | awk '($5 == "PD")'| awk '{a[$4]+=$7} END{for(i in a) print i,$5,a[i]}'| sort -r -k 3,3| head -n1
squeue | awk '($5 == "PD")'| uniq -r -k 3,3 | head -n1
我没有得到所需的答案。
答案 0 :(得分:0)
请您试着跟随并告诉我这是否对您有帮助。
awk 'FNR==1{print;next}{a[$1,$2]++} END{for(i in a){b[a[i]]=i;val=val>a[i]?(val?val:a[i]):a[i]};print b[val]"\t"val}' SUBSEP="\t" Input_file
输出如下。
column 4 column 5 column 7
xyz PD 3
说明:添加一种非单一形式的解决方案也带有解释:
awk '
FNR==1{ ##FNR==1 condition means when very first line of Input_file is being read.
print; ##printing the current line on standard output then.
next ##Using next keyword will skip all further statements.
}
{
a[$1,$2]++ ##Creating an array named a whose index is column 1 and column 2 here, also increasing their occurrences each time a similar entry comes to get the count of column 1 and column 2 as per OPs requirement.
}
END{
for(i in a){ ##using for loop to traverse trough array a all element.
b[a[i]]=i; ##creating an array b whose index is the value of array a with index i(means putting array a value into index of array b here) and keeping array b value as i which is the index of array a.
val=val>a[i]?(val?val:a[i]):a[i]}; ##creating a variable named val here, which will always check if its value is greater than new value of array a or not, if not then it will exchange the value with it, so that we could get the MAX value of column 3.
print b[val]"\t"val ##printing the value of array b with index is val variable and printing TAB then with value of variable val.
}
' SUBSEP="\t" file218 ##Setting SUBSEP to tab and mentioning Input_file here too.
答案 1 :(得分:0)
awk -v OFS="\t" 'FNR==1{print;next}$2=="PD"{a[$1]++;if(a[$1]>max){max=a[$1];ind=$1}}END{print ind,"PD",a[ind]}' infile
更好的可读性
awk -v OFS="\t" '
FNR==1{
print;
next
}
$2=="PD"{
a[$1]++;
if(a[$1]>max)
{
max=a[$1];
ind=$1
}
}
END{
print ind,"PD",a[ind]
}
' infile
或强>
使用GNU awk
:
awk -v OFS="\t" '
BEGIN{
PROCINFO["sorted_in"]="@val_num_desc"
}
FNR==1{
print;
next
}
$2=="PD"{
a[$1]++
}
END{
for(i in a)
{
print i,"PD",a[i];
break
}
}
' infile
答案 2 :(得分:0)
awk 解决方案:
$ awk 'BEGIN{OFS="\t"} \
NR>1 && $2=="PD"{ a[$1]++;b[$1]=$3} \
END{for(i in a) max=(a[max]<a[i]?i:max); \
print max, "PD", b[max]}' file
xyz PD 3
如果你想添加整齐格式的标题:
$ cat tst.awk
BEGIN{FS="[[:space:]][[:space:]]+";OFS="\t"}
NR==1{ for(h=1;h<=NF;h++) printf "%s%s%s", (h>1?OFS:""),$h,(h==NF?"\n":"")
l=length($NF)
next
}
$2=="PD"{ a[$1]++;b[$1]=$3}
END{ for(i in a) max=(a[max]<a[i]?i:max)
print pr(max,l), pr("PD",l), pr(b[max],l)
}
func pr(v,w){ return sprintf("%s%0*s",v,w-length(v)," ") }
给出:
$ awk -f tst.awk file
column 4 column 5 column 7
xyz PD 3
答案 3 :(得分:0)
另一个awk
$ awk -v k="PD" -v OFS='\t' 'NR==1{print;next}
$2==k{a[$1]++}
END {n=asorti(a,i); print i[n], k, a[i[n]]}' file
column 4 column 5 column 7
xyz PD 3