我有一个带有正则表达式分隔符的awk我需要提取名字和姓氏的单词,但是这个命令不起作用
awk -v OFS="\t" -v FS='firstName": "|",[^+]*lastName": "|", "' '{sum[$1]+=$2;} {print $1,$2}' sumacomando
"firstName": "gdrgo", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "lastName": "222",dfg
"xxxxx": "John", "firstName": "beto", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "lastName": "111","xxxxx": "John",
"xxxxx": "John", "firstName": "beto", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "lastName": "111","xxxxx": "John",
"xxxxx": "John", "xxxxx": "John", "firstName": "beto2", "xxxxx": "John","lastName": "555", "xxxxx": "John","xxxxx": "John",
"xxxxx": "John", "xxxxx": "John", "firstName": "beto2", "xxxxx": "John","lastName": "444", "xxxxx": "John","xxxxx": "John",
"firstName": "gdrgo", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "xxxxx": "John", "lastName": "222",dfg
"xxxxx": "John", "xxxxx": "John", "firstName": "beto2", "xxxxx": "John","lastName": "444", "xxxxx": "John","xxxxx": "John",
我需要打印
gdrgo,222
beto,111
beto,111
beto2,555
beto2,444
gdrgo,222
beto2,444
请帮帮我
答案 0 :(得分:2)
您的输入是CSV,因此处理它的方式是GNU awk for FPAT,您的输出也是CSV,因此将OFS设置为选项卡是没有意义的。一般来说,当你有一个包含name->值映射的任何文件时,最简单,最健壮的方法是首先创建一个这些映射的数组(下面是f[]
)然后你可以打印或以其他方式操纵你的数据使用它的名称:
$ cat tst.awk
BEGIN {
FPAT = "([^,]*)|(\"[^\"]+\")"
OFS = ","
}
{
delete f
for (i=1; i<=NF; i++) {
split($i,t,/[[:space:]":]+/)
f[t[2]] = t[3]
}
print f["firstName"], f["lastName"]
}
$ awk -f tst.awk file
gdrgo,222
beto,111
beto,111
beto2,555
beto2,444
gdrgo,222
beto2,444
答案 1 :(得分:-1)
此命令有效
awk -v OFS='"' -v FS='Name": "' '{i = index($3, "\"");i2 = index($4, "\""); print substr($3, 0, i) ";" substr($4, 0, i2) }' sumacomando
gdrgo,222 beto,111 beto,111 beto2,555 beto2,444 gdrgo,222 beto2,444