我有一个包含10列的文本文件,如f.txt,如下所示:
aab abb 263-455
aab abb 263-455
aab abb 263-455
bbb abb 26-455
bbb abb 26-455
bbb aka 264-266
bga bga 230-232
bga bga 230-232
我想根据第三列的数量计算第一列和第二列中每个字符串的唯一编号。
输出:
aab - 1
abb - 2
bbb - 2
aka - 1
bga - 2
Total no - 8
答案 0 :(得分:3)
这样可以解决问题:
$ awk '!a[$0]++{c[$1]++;c[$2]++}
END{for(k in c){print k" - "c[k];s+=c[k]}print "\nTotal No -",s}' file
aka - 1
bga - 2
aab - 1
abb - 2
bbb - 2
Total No - 8
以更易读的脚本形式:
!lines[$0]++{
count[$1]++
count[$2]++
}
END {
for (line in count) {
print line" - "count[line]
sum += count[line]
}
print "\nTotal No -",sum
}
要以此形式运行,请将其保存到文件script.awk
并:
$ awk -f script.awk file
aka - 1
bga - 2
aab - 1
abb - 2
bbb - 2
Total No - 8
答案 1 :(得分:3)
awk '
!s[1":"$1":"$3]++{sU[$1]++;tot++}
!s[2":"$2":"$3]++{sU[$2]++;tot++}
END{
for (x in sU) print x, sU[x];
print "Total No -",tot;
}' input
<强>输出强>
bga 1
aab 1
bbb 2
aka 1
bga 1
abb 2
Total No - 8
答案 2 :(得分:2)
awk '!b[$1,$3]++{a[$1]++} !c[$2,$3]++{a[$2]++} END{for (i in a) {print i,a[i];sum+=a[i]}print "Total -",sum}' file
答案 3 :(得分:1)
这是一个有点长的命令,但它很容易理解:
gawk '{a[$3,$1,1];a[$3,$2,2]}END{for(i in a)print i}' input |
cut -d $'\x1c' -f 2 | sort | uniq -c |
awk -v OFS=' - ' '{sum+=$1;print $2,$1};END{print "\nTotal No",sum}'
aab - 1
abb - 2
aka - 1
bbb - 2
bga - 2
Total No - 8
答案 4 :(得分:0)
{ if (a[$1][$3] != 1){
a[$1][$3] = 1;
total[$1]++;
}
if (a[$2][$3] != 1){
a[$2][$3] = 1;
total[$2]++;
}
}
END {
for (item in total){
print item, total[item];
totalCount += total[item];
}
print "\nTotal no - ", totalCount;
}
输出:
aka 1
bga 1
aab 1
abb 2
bbb 2
Total no - 7