好的问题是,我有一个包含N个给定行的列表,如下所示:
4.96035894 2.94014535 9.71651378 On
8.37470259 9.08139103 10.23145322 Off
5.73085411 4.21656546 9.98718707 On
6.40892867 9.44195654 8.83707549 On
4.26065784 3.74966832 7.89520829 On
8.89601431 9.84208918 9.63054539 On
9.10538764 8.58408119 10.87454882 On
6.21494725 4.61164407 9.08378204 Off
7.62256424 9.59449339 10.84506558 Off
6.49210768 4.03768151 10.75221925 Off
5.04079861 4.99362253 10.34349177 Off
...
目标是找到第三个字段中具有最低值的X(X 在上面的示例中,例如,如果我想要将具有最低第三个值的3行更改为Off,则输出将为: 我认为我可以针对特定情况X = 1,最低值行,但我不知道如何扩展到任意X.也许X大小的数组填充并在进行编辑时清单?4.96035894 2.94014535 9.71651378 On
8.37470259 9.08139103 10.23145322 Off
5.73085411 4.21656546 9.98718707 On
6.40892867 9.44195654 8.83707549 Off // this row is changed
4.26065784 3.74966832 7.89520829 Off // this row is changed
8.89601431 9.84208918 9.63054539 On
9.10538764 8.58408119 10.87454882 On
6.21494725 4.61164407 9.08378204 Off // this row is changed
7.62256424 9.59449339 10.84506558 Off
6.49210768 4.03768151 10.75221925 Off
5.04079861 4.99362253 10.34349177 Off
...
答案 0 :(得分:3)
这样的事情会起作用:
x=3
f=3
awk -v f="$f" '{print $f, NR, $0}' file |
sort -n |
awk -v x="$x" 'NR<=x{sub(/On/,"Off")} {print}' |
sort -k2n |
awk '{sub(/[^ ]+ +[^ ]+ +/,""); print}'
f是您要排序的字段,x是您要标记的最小值。
你可以使用插入排序或gawks内置排序函数asort()/ asorti()在awk中完成所有操作,但上面很简单,我很懒...
$ x=3; f=3; awk -v f="$f" '{print $f, NR, $0}' file | sort -n | awk -v x="$x" 'NR<=x{sub(/On/,"Off")} {print}' | sort -k2n | awk '{sub(/[^ ]+ +[^ ]+ +/,""); print}'
4.96035894 2.94014535 9.71651378 On
8.37470259 9.08139103 10.23145322 Off
5.73085411 4.21656546 9.98718707 On
6.40892867 9.44195654 8.83707549 Off
4.26065784 3.74966832 7.89520829 Off
8.89601431 9.84208918 9.63054539 On
9.10538764 8.58408119 10.87454882 On
6.21494725 4.61164407 9.08378204 Off
7.62256424 9.59449339 10.84506558 Off
6.49210768 4.03768151 10.75221925 Off
5.04079861 4.99362253 10.34349177 Off
$ x=4; f=2; awk -v f="$f" '{print $f, NR, $0}' file | sort -n | awk -v x="$x" 'NR<=x{sub(/On/,"Off")} {print}' | sort -k2n | awk '{sub(/[^ ]+ +[^ ]+ +/,""); print}'
4.96035894 2.94014535 9.71651378 Off
8.37470259 9.08139103 10.23145322 Off
5.73085411 4.21656546 9.98718707 Off
6.40892867 9.44195654 8.83707549 On
4.26065784 3.74966832 7.89520829 Off
8.89601431 9.84208918 9.63054539 On
9.10538764 8.58408119 10.87454882 On
6.21494725 4.61164407 9.08378204 Off
7.62256424 9.59449339 10.84506558 Off
6.49210768 4.03768151 10.75221925 Off
5.04079861 4.99362253 10.34349177 Off
答案 1 :(得分:3)
有趣的问题,你需要聪明的数组:
BEGIN {
if (!x) # If x wasn't set using -v default is 3
x=3
if (!field) # If field wasn't set using -v default is 3
field=3
}
{
lines[NR]=$0 # Store each line in an array
sort[NR]=$field # Store the field in an array
field_a[$field]=$0 # Line lookup on field
}
END{
asort(sort) # Sort the fields
for (j=1;j<=NR;j++) { # For every line in the file
for(i=1;i<=x;i++) { # For the top x values
if (lines[j] == field_a[sort[i]]) { # If current line in top x
sub(/On/,"Off",lines[j]) # Do the subsitution
break # Grab the next line
}
}
print lines[j] # print the line
}
}
将其保存到script.awk
等文件中并运行如下:
$ awk -f script.awk file
4.96035894 2.94014535 9.71651378 On
8.37470259 9.08139103 10.23145322 Off
5.73085411 4.21656546 9.98718707 On
6.40892867 9.44195654 8.83707549 Off
4.26065784 3.74966832 7.89520829 Off
8.89601431 9.84208918 9.63054539 On
9.10538764 8.58408119 10.87454882 On
6.21494725 4.61164407 9.08378204 Off
7.62256424 9.59449339 10.84506558 Off
6.49210768 4.03768151 10.75221925 Off
5.04079861 4.99362253 10.34349177 Off
默认情况下,它会关闭字段3中的最低3个值,但您可以使用-v
选项指定字段和值的数量。例如,让我们关闭字段3中的最低10个值,只打开最大值:
$ awk -v x=10 -f script.awk file
4.96035894 2.94014535 9.71651378 Off
8.37470259 9.08139103 10.23145322 Off
5.73085411 4.21656546 9.98718707 Off
6.40892867 9.44195654 8.83707549 Off
4.26065784 3.74966832 7.89520829 Off
8.89601431 9.84208918 9.63054539 Off
9.10538764 8.58408119 10.87454882 On
6.21494725 4.61164407 9.08378204 Off
7.62256424 9.59449339 10.84506558 Off
6.49210768 4.03768151 10.75221925 Off
5.04079861 4.99362253 10.34349177 Off
第2场的最大值如何:
$ awk -v x=10 -v field=2 -f script.awk file
4.96035894 2.94014535 9.71651378 Off
8.37470259 9.08139103 10.23145322 Off
5.73085411 4.21656546 9.98718707 Off
6.40892867 9.44195654 8.83707549 Off
4.26065784 3.74966832 7.89520829 Off
8.89601431 9.84208918 9.63054539 On
9.10538764 8.58408119 10.87454882 Off
6.21494725 4.61164407 9.08378204 Off
7.62256424 9.59449339 10.84506558 Off
6.49210768 4.03768151 10.75221925 Off
5.04079861 4.99362253 10.34349177 Off
注意:使用asort()
功能需要GNU awk
。
答案 2 :(得分:1)
和另一种方法:
n=4
field=3
newval=FOO
# find the line numbers that need to be updated
set -- $(
cat -n file |
sort -nk $((++field)),$field |
awk -v n=$n 'FNR <= n {print $1}'
)
# now, update the value for the specific lines
awk -v val="$newval" -v lines=" $* " 'lines ~ " "FNR" " {$NF = val} 1' file
答案 3 :(得分:1)
另一种方法,两次读取文件,按我们的顺序排序..
awk '
NR==FNR{
S[0]=$field
# sort the value into place
for(i=1;i<=n;i++){
if(S[i-1]>S[i]){
c=S[i-1]
S[i-1]=S[i]
S[i]=c
}
}
# shift the highest value into oblivion
if(NR>n) for(i=n; i>=1; i--) S[i]=S[i-1]
next
}
# Create associative array entries for the values
FNR==1 {
for(i=1;i<=n;i++){
A[S[i]]
}
}
# if $field is one of the values then change the last field (assuming there are no other fields with value of $NF)
$field in A {
sub($NF,"Off")
}
1
' n=3 field=3 file file