输入文件:
civil 4
posición 3
formación 7
posición 5
domingo 1
retrato 5
retrato 6
civil 6
formación 3
retrato 7
domingo 7
media 1
media 1
我希望输出为:
civil 4 domingo 1 formación 3 media 1 posición 3 retrato 5
civil 6 domingo 7 formación 7 media 1 posición 5 retrato 6
average# average# average# average# average# retrato 7
average#
所以我可以sort -t","
将原始输入设为
civil 4
civil 6
domingo 1
domingo 7
formación 3
formación 7
media 1
media 1
posición 3
posición 5
retrato 5
retrato 6
retrato 7
和类似awk '{x+=$insertcolumn} END { for (x> 0) print x/NR }'
之类的东西可以获得平均值但是如何在中间步骤中获得列格式?
答案 0 :(得分:2)
$ cat tst.awk
BEGIN { nw=length("average"); vw=1 }
!seenCnt[$1]++ { keys[++numKeys]=$1 }
{
vals[$1,seenCnt[$1]] = $2
nw = (length($1) > nw ? length($1) : nw)
vw = (length($2) > vw ? length($2) : vw)
numRows = (seenCnt[$1] > numRows ? seenCnt[$1] : numRows)
}
END {
for (rowNr=1; rowNr<=(numRows+1); rowNr++) {
for (keyNr=1; keyNr<=numKeys; keyNr++) {
key = keys[keyNr]
name = val = ""
if ( (key,rowNr) in vals ) {
name = key
val = vals[key,rowNr]
sum[key] += vals[key,rowNr]
}
else if (key in sum) {
name = "average"
val = sum[key]/(rowNr-1)
delete sum[key]
}
printf "%-*s %*s%s", nw, name, vw, val, (keyNr<numKeys?OFS:ORS)
}
}
}
$ sort file | awk -f tst.awk
civil 4 domingo 1 formación 3 media 1 posición 3 retrato 5
civil 6 domingo 7 formación 7 media 1 posición 5 retrato 6
average 5 average 4 average 5 average 1 average 4 retrato 7
average 6
答案 1 :(得分:1)
考虑您的输入有逗号分隔值:
gawk <inputFile -F, 'BEGIN{max=0; maxl=0}$2 != ""{x=$1; a[x][0]+=$2; l=length(a[x]); a[x][l]=$2; if (l > max) max=l; l2=length($1); if (l2>maxl) maxl=l2}END{i=0; n=maxl+2; while (i<max){i++; for (j in a) {if (!a[j][i]) {printf("%"n"s %2s","",""); if (!b[j]) b[j]=a[j][0]/(i-1)} else {printf("%"n"s %2s",j,a[j][i]); if (i==max) b[j]=a[j][0]/i}}; print ""; }; print ""; for (j in a) {printf("%"maxl"s %.2f","avg",b[j])}; print ""}'
BEGIN {
max=0 # used to know how many lines to print
maxl=0 # used to know how wide a column will be
}
$2 != "" { # For all non-empty lines, do this block
x=$1
a[x][0]+=$2 # create the sum while reading input
# also used to make a[x] an array
l=length(a[x])
a[x][l]=$2 # appending to the array the new value
if (l > max) max=l
l2=length($1)
if (l2>maxl) maxl=l2 # getting the longest word length
}
END {
i=0
n=maxl+2 # pretty print with additional spaces
while (i<max){
i++ # skip 0-value which is the sum
for (j in a) {
if (!a[j][i]) {
printf("%"n"s %2s","","") # empty column
if (!b[j]) b[j]=a[j][0]/(i-1) # calculate average
} else {
printf("%"n"s %2s",j,a[j][i]) # show column
if (i==max) b[j]=a[j][0]/i # calculate average
}
}
print "" # start next line
}
print "" # skip a line
for (j in a) {
printf("%"maxl"s %.2f","avg",b[j]) # print averages
}
print "" # end output with a newline
}
civil,4
posición,3
formación,7
posición,5
domingo,1
retrato,5
retrato,6
civil,6
formación,3
retrato,7
domingo,7
media,1
media,1
domingo 1 posición 3 media 1 retrato 5 civil 4 formación 7
domingo 7 posición 5 media 1 retrato 6 civil 6 formación 3
retrato 7
avg 4.00 avg 4.00 avg 1.00 avg 6.00 avg 5.00 avg 5.00
awk不能在数组上使用length(),因此我们将长度存储在另一个数组中。
l=length(a[x])
a[x][l]=$2
if (l > max) max=l
需要改为
l[x]++
a[x][l[x]]=$2
if (l[x] > max) max=l[x]
awk <inputFile -F, 'BEGIN{max=0; maxl=0}$2 != ""{x=$1; a[x][0]+=$2; l[x]++; a[x][l[x]]=$2; if (l[x] > max) max=l[x]; l2=length($1); if (l2>maxl) maxl=l2}END{i=0; n=maxl+2; while (i<max){i++; for (j in a) {if (!a[j][i]) {printf("%"n"s %2s","",""); if (!b[j]) b[j]=a[j][0]/(i-1)} else {printf("%"n"s %2s",j,a[j][i]); if (i==max) b[j]=a[j][0]/i}}; print ""; }; print ""; for (j in a) {printf("%"maxl"s %.2f","avg",b[j])}; print ""}'
(如果您awk
使用gawk
,请使用gawk --posix
)
留给读者练习:
替换最后一个for (...){print ...}
循环以允许按字母顺序对输出列进行排序。