我需要帮助来理解我们的一位同事过去用AWK编写的以下代码逻辑。 具有AWK知识的人可以帮助我理解此代码。
谢谢, 桑迪普
sed -i 's/\r//g' $1 $2
sed -i 's/,/;/g' $1 $2
awk -F"|" '{if(FILENAME=="Parameter.txt"){a[$1]=NR;aa[NR]=$1;d=NR;if(NR==1){e=$1}else{e=e","$1};}
else if(FILENAME=="Traffic.csv"){h[FNR","$2]=$3;x[FNR","$4]=$5;k[FNR","$2","$3","$4","$5]=$1;y=FNR;}
else if(FILENAME=="Filter.txt"){for(i=1;i<=NF;i=i+3){if($(i+1)!="ne"){FE[FNR","$i]=$(i+2)}else{FNE[FNR","$i]=$(i+2)}};FILTER=FNR;FC[FNR]=(NF/3);}
else{
if(g==""){print e",TRAFFIC_CASE";g=1};
for(i=1;i<=d;i++){c[i]=""};l="";m="";z="";FINAL="";for(j=1;j<=FILTER;j++){FI[j]=""};
for(i=1;i<=NF;i++){
split($i,b,"=");
if(a[b[1]]!=""){c[a[b[1]]]=b[2];};
for(j=1;j<=y;j++){
if(h[j","a[b[1]]]==b[2] && h[j","a[b[1]]]!=""){l=a[b[1]]","b[2]};
if(x[j","a[b[1]]]==b[2] && x[j","a[b[1]]]!=""){z=a[b[1]]","b[2]};
if(k[j","l","z]!=""){m=k[j","l","z]};};
};
if(substr(FILENAME,20,3)=="SVC"){Q[c[a[MSISDN]]]=1}
else if(Q[c[a[MSISDN]]]!=1){
for(i=1;i<=d;i++){
if(i==1){f=c[1];}else{f=f","c[i]};
if(c[i]==""){c[i]="B"};
for(j=1;j<=FILTER;j++){
if(FE[j","i]!=""){if(FE[j","i]==c[i] && (FI[j]=="" || FI[j]<=FC[j])){FI[j]=FI[j]+1;}else{FI[j]=FI[j]-1;}};
if(FNE[j","i]!=""){if(FNE[j","i]!=c[i] && (FI[j]=="" || FI[j]<=FC[j])){FI[j]=FI[j]+1;}else{FI[j]=FI[j]-1;}};
};
};
}
for(j=1;j<=FILTER;j++){if(FI[j]==FC[j]){FINAL=1};};
if(FINAL!=1){print f","m;};
};
}' Parameter.txt Traffic.csv Filter.txt $2 $1
答案 0 :(得分:0)
以下是gawk -o
印刷精美的代码,以使其易于阅读(以防万一),因此您可以自己解决:
{
if (FILENAME == "Parameter.txt") {
a[$1] = NR
aa[NR] = $1
d = NR
if (NR == 1) {
e = $1
} else {
e = e "," $1
}
} else if (FILENAME == "Traffic.csv") {
h[FNR "," $2] = $3
x[FNR "," $4] = $5
k[FNR "," $2 "," $3 "," $4 "," $5] = $1
y = FNR
} else if (FILENAME == "Filter.txt") {
for (i = 1; i <= NF; i = i + 3) {
if ($(i + 1) != "ne") {
FE[FNR "," $i] = $(i + 2)
} else {
FNE[FNR "," $i] = $(i + 2)
}
}
FILTER = FNR
FC[FNR] = (NF / 3)
} else {
if (g == "") {
print e ",TRAFFIC_CASE"
g = 1
}
for (i = 1; i <= d; i++) {
c[i] = ""
}
l = ""
m = ""
z = ""
FINAL = ""
for (j = 1; j <= FILTER; j++) {
FI[j] = ""
}
for (i = 1; i <= NF; i++) {
split($i, b, "=")
if (a[b[1]] != "") {
c[a[b[1]]] = b[2]
}
for (j = 1; j <= y; j++) {
if (h[j "," a[b[1]]] == b[2] && h[j "," a[b[1]]] != "") {
l = a[b[1]] "," b[2]
}
if (x[j "," a[b[1]]] == b[2] && x[j "," a[b[1]]] != "") {
z = a[b[1]] "," b[2]
}
if (k[j "," l "," z] != "") {
m = k[j "," l "," z]
}
}
}
if (substr(FILENAME, 20, 3) == "SVC") {
Q[c[a[MSISDN]]] = 1
} else if (Q[c[a[MSISDN]]] != 1) {
for (i = 1; i <= d; i++) {
if (i == 1) {
f = c[1]
} else {
f = f "," c[i]
}
if (c[i] == "") {
c[i] = "B"
}
for (j = 1; j <= FILTER; j++) {
if (FE[j "," i] != "") {
if (FE[j "," i] == c[i] && (FI[j] == "" || FI[j] <= FC[j])) {
FI[j] = FI[j] + 1
} else {
FI[j] = FI[j] - 1
}
}
if (FNE[j "," i] != "") {
if (FNE[j "," i] != c[i] && (FI[j] == "" || FI[j] <= FC[j])) {
FI[j] = FI[j] + 1
} else {
FI[j] = FI[j] - 1
}
}
}
}
}
for (j = 1; j <= FILTER; j++) {
if (FI[j] == FC[j]) {
FINAL = 1
}
}
if (FINAL != 1) {
print f "," m
}
}
}
现在,在您试图理解脚本的过程中,将其正确格式化为第2步,就是将所有变量重命名为它们真正代表的名称,例如a[$1]=NR
-> param2lineNr[$1]=NR
和aa[NR]=$1
-> lineNr2param[NR]=$1
或类似的文件,取决于该$1
文件中的Parameter.txt
。您还应该引入一些中间的命名良好的变量,这样您就不会试图弄清楚像Q[c[a[MSISDN]]]
这样的结构在下次您阅读代码时实际上意味着什么(对我来说,这很像一个错误,{{1 }}是未初始化的变量-作者可能的意思是MSISDN
,但肯定是idk)。一旦对所有变量都做到了这一点,整个脚本的含义就应该清楚了。玩得开心!
答案 1 :(得分:-1)
我们在文件中有参数,交通情况和过滤器...这是一个足够大的脚本,它也希望包含在独立文件中。
我的过程:
","
的串联...可以为此使用SUBSEP(经过进一步的重构,可能可以消除SUBSEP定义了……不相关if (foo[bar] == "")
的支票,我将其替换为更直观的if (bar in foo)
s/\<x\>/something_x/g
替换-vim确实很不错,因为您可以使用\<
\>
... split("", arr)
替换了空白数组aa
。foo = foo + 1
更改为foo++
if
语句中颠倒了逻辑条件,并摆脱了多余的{}
。这是我的猜测:
#! /usr/bin/awk -f
BEGIN {
FS = "|"
fileno = 0
SUBSEP = ","
}
FILENAME == "Parameter.txt" {
param[$1] = NR
param_sz = NR
if (FNR == 1)
param_list = $1
else
param_list = param_list "," $1
next
}
FILENAME == "Traffic.csv" {
traffic_h[FNR, $2] = $3
traffic_x[FNR, $4] = $5
traffic_key[FNR, $2, $3, $4, $5] = $1
traffic_sz = FNR
next
}
FILENAME == "Filter.txt" {
for (i = 1; i <= NF; i += 3) {
if ($(i + 1) == "ne")
filter_ne[FNR, $i] = $(i + 2)
else
filter_eq[FNR, $i] = $(i + 2)
}
filter_sz = FNR
filters[FNR] = (NF / 3)
next
}
fileno == 0 {
print param_list ",TRAFFIC_CASE"
fileno = FNR
}
{
split("", case)
l = ""
traffic_match = ""
z = ""
final_filter = 0
split("", filtered)
for (i = 1; i <= NF; i++) {
split($i, b, "=")
key = b[1]
val = b[2]
if (key in param) {
this_param = param[key]
case[this_param] = val
for (t = 1; t <= traffic_sz; t++) {
if ((t, this_param == val && traffic_h[t, this_param]) in traffic_h)
l = val
if ((t, this_param == val && traffic_x[j, this_param]) in traffic_x)
z = val
if ((t, this_param, l, this_param, z) in traffic_key)
traffic_match = traffic_key[t, this_param, l, this_param, z]
}
}
}
if (substr(FILENAME, 20, 3) == "SVC") {
svc_case[case[param["MSISDN"]]] = "" # << MSISDN was = "" so... arbitrary string should be OK or... should fix your bug?
}
if (!(case[param["MSISDN"]] in svc_case)) {
for (p = 1; p <= param_sz; p++) {
if (p == 1)
case_list = case[1]
else
case_list = case_list "," case[i]
if (!(p in case))
case[p] = "B"
for (f = 1; f <= filter_sz; f++) {
if ((f, p) in filter_eq) {
if (filter_eq[f, p] == case[p] && (!f in filtered) || filtered[f] <= filters[f]))
++filtered[f]
else
--filtered[f]
} else if ((j, i) in filter_ne) {
if (filter_ne[f, p] != case[p] && (!(f in filtered) || filtered[f] <= filters[f]))
++filtered[f]
else
--filtered[f]
}
}
}
}
for (f = 1; f <= filter_sz; f++)
if (filtered[f] == filters[f])
final_filter = 1
if (!final_filter)
print case_list "," traffic_match
}
所以,很可能我出了点问题,因为我没有用于测试用例的I / O,但是您可以重复我的步骤来弄清现实。我将进一步处理该过滤器...我将检查“ MSISDN”(如Ed所建议的那样),看看这对您来说是否有趣...
另一似乎令人怀疑的事情是,您表面上试图用“ |”解析.csv文件。作为分隔符...可能行不通。