我有awk命令用|
sperator读取csv文件。我使用此命令作为我的shell脚本的一部分,其中要排除的列将从输出中删除。列的列表输入为1 2 3
命令参考:http://wiki.bash-hackers.org/snipplets/awkcsv
awk -v FS='"| "|^"|"$' '{for i in $test; do $(echo $i=""); done print }' test.csv
$test is 1 2 3
我想在$1="" $2="" $3=""
所有列前打印print
。我收到此错误
awk: {for i in $test; do $(echo $i=""); done {print }
awk: ^ syntax error
此命令正常工作,可打印所有列
awk -v FS='"| "|^"|"$' '{print }' test.csv
档案1
"first"| "second"| "last"
"fir|st"| "second"| "last"
"firtst one"| "sec|ond field"| "final|ly"
如果我想动态排除第2列和第3列
,则预期输出first
fir|st
firtst one
我需要帮助才能正确保持for循环。
答案 0 :(得分:3)
使用GNU awk for FPAT:
$ awk -v FPAT='"[^"]+"' '{print $1}' file
"first"
"fir|st"
"firtst one"
$ awk -v flds='1' -v FPAT='"[^"]+"' 'BEGIN{n=split(flds,f,/ /)} {for (i=1;i<=n;i++) printf "%s%s", $(f[i]), (i<n?OFS:ORS)}' file
"first"
"fir|st"
"firtst one"
$ awk -v flds='2 3' -v FPAT='"[^"]+"' 'BEGIN{n=split(flds,f,/ /)} {for (i=1;i<=n;i++) printf "%s%s", $(f[i]), (i<n?OFS:ORS)}' file
"second" "last"
"second" "last"
"sec|ond field" "final|ly"
$ awk -v flds='3 1' -v FPAT='"[^"]+"' 'BEGIN{n=split(flds,f,/ /)} {for (i=1;i<=n;i++) printf "%s%s", $(f[i]), (i<n?OFS:ORS)}' file
"last" "first"
"last" "fir|st"
"final|ly" "firtst one"
如果您不希望输出字段以空白字符分隔,请将OFS
设置为-v OFS='whatever'
所需的任何内容。如果你想摆脱周围的引号你可以使用gensub()(因为我们无论如何都使用gawk)或者每个字段使用substr(),例如:
$ awk -v OFS=';' -v flds='1 3' -v FPAT='"[^"]+"' 'BEGIN{n=split(flds,f,/ /)} {for (i=1;i<=n;i++) printf "%s%s", substr($(f[i]),2,length($(f[i]))-2), (i<n?OFS:ORS)}' file
first;last
fir|st;last
firtst one;final|ly
$ awk -v OFS=';' -v flds='1 3' -v FPAT='"[^"]+"' 'BEGIN{n=split(flds,f,/ /)} {for (i=1;i<=n;i++) printf "%s%s", gensub(/"/,"","g",$(f[i])), (i<n?OFS:ORS)}' file
first;last
fir|st;last
firtst one;final|ly
答案 1 :(得分:2)
在GNU awk中(对于FPAT
):
$ test="2 3" # fields to exclude in bash var $test
$ awk -v t="$test" ' # taken to awk var t
BEGIN { # first
FPAT="([^|]+)|( *\"[^\"]+\")" # instead of FS, use FPAT
split(t,a," ") # process t to e:
for(i in a) # a[1]=2 -> e[2], etc.
e[a[i]]
}
{
for(i=1;i<=NF;i++) # for each field
if((i in e)==0) { # if field # not in e
gsub(/^\"|\"$/,"",$i) # remove leading and trailing "
b=b (b==""?"":OFS) $i # put to buffer b
}
print b; b="" # putput and reset buffer
}' file
first
fir|st
firtst one
FPAT
用作FS
无法处理引号中的分隔符。
答案 2 :(得分:1)
Vikram,如果您的实际Input_file与显示的示例Input_file的DITTO相同,那么以下内容可能对您有所帮助。我也将在这里简单地添加解释(使用GNU awk 3.1.7对旧版本的awk进行测试)。
awk -v num="2,3" 'BEGIN{
len=split(num, val,",")
}
{while($0){
match($0,/.[^"]*/);
if(substr($0,RSTART,RLENGTH+1) && substr($0,RSTART,RLENGTH+1) !~ /\"\| \"/ && substr($0,RSTART,RLENGTH+1) !~ /^\"$/ && substr($0,RSTART,RLENGTH+1) !~ /^\" \"$/){
array[++i]=substr($0,RSTART,RLENGTH+1)
};
$0=substr($0,RLENGTH+1);
};
for(l=1;l<=len;l++){
delete array[val[l]]
};
for(j=1;j<=length(array);j++){
if(array[j]){
gsub(/^\"|\"$/,"",array[j]);
printf("%s%s",array[j],j==length(array)?"":" ")
}
};
print "";
i="";
delete array
}' Input_file
EDIT1:此处也添加了解释代码。
awk -v num="2,3" 'BEGIN{ ##creating a variable named num whose value is comma seprated values of fields which you want to nullify, starting BEGIN section here.
len=split(num, val,",") ##creating an array named val here whose delimiter is comma and creating len variable whose value is length of array val here.
}
{while($0){ ##Starting a while loop here which will run for a single line till that line is NOT getting null.
match($0,/.[^"]*/);##using match functionality which will look for matches from starting to till a " comes into match.
if(substr($0,RSTART,RLENGTH+1) && substr($0,RSTART,RLENGTH+1) !~ /\"\| \"/ && substr($0,RSTART,RLENGTH+1) !~ /^\"$/ && substr($0,RSTART,RLENGTH+1) !~ /^\" \"$/){##So RSTATR and RLENGTH are the variables which will be set when a regex is having a match in line/variable passed into match function. In this if condition I am checking 1st: value of substring of RSTART,RLENGTH+1 should not be NULL. 2nd: Then checking this substring should not be having " pipe space ". 3rd condition: Checking if substring is NOT equal to a string which starts from " and ending with it. 4th condition: Checking here if substring is NOT equal to ^" space "$, if all conditions are TRUE then do following actions.
array[++i]=substr($0,RSTART,RLENGTH+1) ##creating an array named array whose index is variable i with increasing value of i and its value is substring of RSTART to till RLENGTH+1.
};
$0=substr($0,RLENGTH+1);##Now removing the matched part from current line which will decrease the length of line and avoid the while loop to become as infinite.
};
for(l=1;l<=len;l++){##Starting a loop here once while above loop is done which runs from starting of variable l=1 to value of len.
delete array[val[l]] ##Deleting here those values which we want to REMOVE from OPs request, so removing here.
};
for(j=1;j<=length(array);j++){##Start a for loop from the value of j=1 till the value of lengthh of array.
if(array[j]){ ##Now making sure array value whose index is j is NOT NULL, if yes then perform following statements.
gsub(/^\"|\"$/,"",array[j]); ##Globally substituting starting " and ending " with NULL in value of array value.
printf("%s%s",array[j],j==length(array)?"":" ") ##Now printing the value of array and secondly printing space or null depending upon if j value is equal to array length then print NULL else print space. It is because we don not want space at the last of the line.
}
};
print ""; ##Because above printf will NOT print a new line, so printing a new line.
i=""; ##Nullifying variable i here.
delete array ##Deleting array here.
}' Input_file ##Mentioning Input_file here.