假设我有一个包含以下格式标题的CSV文件:
Field1,Field2
3,262000
4,449000
5,650000
6,853000
7,1061000
8,1263000
9,1473000
10,1683000
11,1893000
我想编写一个awk脚本,它将以逗号分隔的字段名称target
列表,将其拆分为一个数组,然后只选择具有我指定名称的列。
这是我到目前为止所尝试的,并且我已经验证head
数组包含所需的标头,targets
数组包含给定命令行传入的所需目标。< / p>
BEGIN{
FS=","
split(target, targets, ",")
}
NR==1 {
for (i = 1; i <= NF; i++) head[i] = $i
}
NR !=1{
for (i = 1; i <= NF; i++) {
if (head[i] in targets){
print $i
}
}
}
当我使用命令
调用此脚本时awk -v target = Field1 -f GetCol.awk Debug.csv
我没有打印出来。
答案 0 :(得分:10)
我想出来并发布了答案以防其他人遇到同样的问题。
它与我用于测试数组成员资格的in
关键字有关。
此关键字仅测试左侧的操作数是否是右侧数组中的索引之一,而不是值的值。
修复是创建反向查找数组,如下所示。
BEGIN{
OFS=FS=","
split(target, t_targets, ",")
for (i in t_targets)
targets[t_targets[i]] = i
}
答案 1 :(得分:6)
我的两分钱:
BEGIN{
OFS=FS=","
split(target,fields,FS) # We just set FS don't hard the comma here
for (i in fields) # Distinct var name to aviod headaches
field_idx[fields[i]] = i # Reverse lookup
}
NR==1 { # Process header
for (i=1;i<=NF;i++) # For each field header
head[i] = $i # Add to hash for comparision with target
next # Skip to next line
}
{ # Don't need invert condition (used next)
sep="" # Set for leading separator
for (i=1;i<=NF;i++) # For each field
if (head[i] in field_idx) { # Test for current field is a target field
printf "%s%s",sep,$i # Print the column if matched
sep=OFS # Set separator to OFS
}
printf "\n" # Print newline character
}
答案 2 :(得分:1)
@ sudo_O解决方案的扩展(谢谢)
#!/usr/bin/awk -f
# Process standard input outputting named columns provided as arguments.
#
# For example, given foo.dat containing
# a b c c
# 1a 1b 1c 1C
# 2a 2b 2c 2C
# 3a 3b 3c 3C
# Running
# cat foo.dat | ./namedcols c b a a d
# will output
# 1c 1b 1a 1a d
# 2c 2b 2a 2a d
# 3c 3b 3a 3a d
# and will warn on standard error that it
# Ignored duplicate 'c' in column 4
# Notice that the requested but missing column d contains "d".
#
# Using awk's -F feature it is possible to parse comma-separated data:
# cat foo.csv | ./namedcols -F, c b a a d
BEGIN {
for (i=1; i<ARGC; ++i)
desired[i] = ARGV[i]
delete ARGV
}
NR==1 {
for (i=1; i<=NF; i++)
if ($i in names)
printf "Ignored duplicate '%s' in column %d\n", $i, i | "cat 1>&2"
else
names[$i] = i
next
}
{
for (i=1; i<ARGC; ++i)
printf "%s%s", \
(i==1 ? "" : OFS), \
((ndx = names[name = desired[i]])>0 ? $ndx: name)
printf RS
}