想在单个脚本中运行多个“awk”命令.. 例如,位于/cygdrive/e/Test/Master.csv.gz的Master.csv.gz和 输入文件位于不同的子目录中,如/cygdrive/f/Jan/Input_Jan.csv.gz& /cygdrive/f/Feb/Input_Feb.csv.gz等等.. 所有输入文件都是* .gz扩展名文件。
以下命令在逐个执行命令时正常工作:
命令#1
awk ' BEGIN {FS = OFS = ","} FNR==NR {a[$2] = $0; next} ($2 in a) {print $0}' <(gzip -dc /cygdrive/e/Test/Master.csv.gz) <(gzip -dc /cygdrive/f/Jan/Input_Jan.csv.gz) >>Output.txt
输出#1:
Name,Age,Location
abc,20,xxx
命令#2
awk ' BEGIN {FS = OFS = ","} FNR==NR {a[$2] = $0; next} ($2 in a) {print $0}' <(gzip -dc /cygdrive/e/Test/Master.csv.gz) <(gzip -dc /cygdrive/f/Feb/Input_Feb.csv.gz) >>Output.txt
输出#2:
Name,Age,Location
def,40,yyy
cat Output.txt
Name,Age,Location
abc,20,xxx
def,40,yyy
尝试过以下命令通过单个脚本运行,得到错误:
尝试#1:awk -f Test.awk
cat Test.awk
awk ' BEGIN {FS = OFS = ","} FNR==NR {a[$2] = $0; next} ($2 in a) {print $0}' <(gzip -dc /cygdrive/e/Test/Master.csv.gz) <(gzip -dc /cygdrive/f/Jan/Input_Jan.csv.gz) >>Output.txt
awk ' BEGIN {FS = OFS = ","} FNR==NR {a[$2] = $0; next} ($2 in a) {print $0}' <(gzip -dc /cygdrive/e/Test/Master.csv.gz) <(gzip -dc /cygdrive/f/Feb/Input_Feb.csv.gz) >>Output.txt
错误:尝试#1:awk -f Test.awk
awk: Test.awk:1: ^ invalid char ''' in expression
awk: Test.awk:1: ^ syntax error
尝试#2:sh Test.sh
cat Test.sh
#!/bin/sh
awk ' BEGIN {FS = OFS = ","} FNR==NR {a[$2] = $0; next} ($2 in a) {print $0}' <(gzip -dc /cygdrive/e/Test/Master.csv.gz) <(gzip -dc /cygdrive/f/Jan/Input_Jan.csv.gz) >>Output.txt
awk ' BEGIN {FS = OFS = ","} FNR==NR {a[$2] = $0; next} ($2 in a) {print $0}' <(gzip -dc /cygdrive/e/Test/Master.csv.gz) <(gzip -dc /cygdrive/f/Feb/Input_Feb.csv.gz) >>Output.txt
错误:尝试#2:sh Test.sh
Test.sh: line 2: syntax error near unexpected token `('
期望的输出:
Name,Age,Location
abc,20,xxx
def,40,yyy
寻找你的建议..
更新#2个月名称 Ed Morton,感谢输入,但输出顺序不正确,“Jan2014”打印在下一行,请建议
cat Output.txt:
Name,Age,Location
abc,20,xxx
Jan2014
def,40,yyy
Feb2014
预期产出
Name,Age,Location
abc,20,xxx,Jan2014
def,40,yyy,Feb2014
答案 0 :(得分:3)
您只需要:
#!/bin/bash
awk -F, 'FNR==NR{a[$2]; next} $2 in a' \
<(gzip -dc /cygdrive/e/Test/Master.csv.gz) \
<(gzip -dc /cygdrive/f/Jan/Input_Jan.csv.gz) \
<(gzip -dc /cygdrive/f/Feb/Input_Feb.csv.gz) \
>> Output.txt
如果您想打印月份名称,那么最简单的事情就是:
#!/bin/bash
awk -F, 'FNR==NR{a[$2]; next} $2 in a{print $0, mth}' \
<(gzip -dc /cygdrive/e/Test/Master.csv.gz) \
mth="Jan" <(gzip -dc /cygdrive/f/Jan/Input_Jan.csv.gz) \
mth="Feb" <(gzip -dc /cygdrive/f/Feb/Input_Feb.csv.gz) \
>> Output.txt
但您可以使用以下命令删除每行3次冗余的月份名称:
#!/bin/bash
mths=(Jan Feb)
awk -F, 'FNR==NR{a[$2]; next} $2 in a{print $0, mth}' \
<(gzip -dc /cygdrive/e/Test/Master.csv.gz) \
mth="${mths[$((i++))]}" <(gzip -dc "/cygdrive/f/${mths[$i]}/Input_${mths[$i]}.csv.gz") \
mth="${mths[$((i++))]}" <(gzip -dc "/cygdrive/f/${mths[$i]}/Input_${mths[$i]}.csv.gz") \
>> Output.txt
答案 1 :(得分:0)
您的首次尝试失败是因为您尝试在awk
脚本中调用awk
,而您的第二次尝试失败,因为bash
进程替换<(...)
不是defined by POSIX,并且无法保证与/bin/sh
一起使用。这是一个应该有效的awk
脚本。
#!/usr/bin/awk -f
BEGIN {
if (ARGC < 3) exit 1;
ct = "cat ";
gz = "gzip -dc "
f = "\"" ARGV[1] "\"";
c = (f~/\.gz$/)?gz:ct;
while ((c f | getline t) > 0) {
split(t, a, ",");
A[a[2]] = t;
}
close(c f);
for (n = 2; n < ARGC; n++) {
f = "\"" ARGV[n] "\"";
c = (f~/\.gz$/)?gz:ct;
while ((c f | getline t) > 0) {
split(t, a, ",");
if (a[2] in A) print t;
}
close(c f);
}
exit;
}
使用
script.awk /cygdrive/e/Test/Master.csv.gz /cygdrive/f/Jan/Input_Jan.csv.gz
script.awk /cygdrive/e/Test/Master.csv.gz /cygdrive/f/Feb/Input_Feb.csv.gz
或
script.awk /cygdrive/e/Test/Master.csv.gz /cygdrive/f/Jan/Input_Jan.csv.gz\
/cygdrive/f/Feb/Input_Feb.csv.gz