需要帮助来了解AWK脚本逻辑

时间:2019-05-24 20:36:21

标签: shell awk

我需要帮助来理解我们的一位同事过去用AWK编写的以下代码逻辑。 具有AWK知识的人可以帮助我理解此代码。

谢谢, 桑迪普

sed -i 's/\r//g' $1 $2
sed -i 's/,/;/g' $1 $2
awk -F"|" '{if(FILENAME=="Parameter.txt"){a[$1]=NR;aa[NR]=$1;d=NR;if(NR==1){e=$1}else{e=e","$1};}
                        else if(FILENAME=="Traffic.csv"){h[FNR","$2]=$3;x[FNR","$4]=$5;k[FNR","$2","$3","$4","$5]=$1;y=FNR;}
                        else if(FILENAME=="Filter.txt"){for(i=1;i<=NF;i=i+3){if($(i+1)!="ne"){FE[FNR","$i]=$(i+2)}else{FNE[FNR","$i]=$(i+2)}};FILTER=FNR;FC[FNR]=(NF/3);}
                        else{
                                if(g==""){print e",TRAFFIC_CASE";g=1};
                                for(i=1;i<=d;i++){c[i]=""};l="";m="";z="";FINAL="";for(j=1;j<=FILTER;j++){FI[j]=""};
                                for(i=1;i<=NF;i++){
                                                                        split($i,b,"=");
                                                                        if(a[b[1]]!=""){c[a[b[1]]]=b[2];};
                                                                        for(j=1;j<=y;j++){
                                                                                if(h[j","a[b[1]]]==b[2] && h[j","a[b[1]]]!=""){l=a[b[1]]","b[2]};
                                                                                if(x[j","a[b[1]]]==b[2] && x[j","a[b[1]]]!=""){z=a[b[1]]","b[2]};
                                                                                if(k[j","l","z]!=""){m=k[j","l","z]};};
                                                                        };
                                if(substr(FILENAME,20,3)=="SVC"){Q[c[a[MSISDN]]]=1}
                                else if(Q[c[a[MSISDN]]]!=1){
                                        for(i=1;i<=d;i++){
                                                                                if(i==1){f=c[1];}else{f=f","c[i]};
                                                                                if(c[i]==""){c[i]="B"};
                                                                                for(j=1;j<=FILTER;j++){
                                                                                        if(FE[j","i]!=""){if(FE[j","i]==c[i] && (FI[j]=="" || FI[j]<=FC[j])){FI[j]=FI[j]+1;}else{FI[j]=FI[j]-1;}};
                                                                                        if(FNE[j","i]!=""){if(FNE[j","i]!=c[i] && (FI[j]=="" || FI[j]<=FC[j])){FI[j]=FI[j]+1;}else{FI[j]=FI[j]-1;}};
                                                                                        };
                                                                                };
                                                                        }
                                for(j=1;j<=FILTER;j++){if(FI[j]==FC[j]){FINAL=1};};
                                if(FINAL!=1){print f","m;};
                                };
                        }' Parameter.txt Traffic.csv Filter.txt $2 $1

2 个答案:

答案 0 :(得分:0)

以下是gawk -o印刷精美的代码,以使其易于阅读(以防万一),因此您可以自己解决:

{
    if (FILENAME == "Parameter.txt") {
        a[$1] = NR
        aa[NR] = $1
        d = NR
        if (NR == 1) {
            e = $1
        } else {
            e = e "," $1
        }
    } else if (FILENAME == "Traffic.csv") {
        h[FNR "," $2] = $3
        x[FNR "," $4] = $5
        k[FNR "," $2 "," $3 "," $4 "," $5] = $1
        y = FNR
    } else if (FILENAME == "Filter.txt") {
        for (i = 1; i <= NF; i = i + 3) {
            if ($(i + 1) != "ne") {
                FE[FNR "," $i] = $(i + 2)
            } else {
                FNE[FNR "," $i] = $(i + 2)
            }
        }
        FILTER = FNR
        FC[FNR] = (NF / 3)
    } else {
        if (g == "") {
            print e ",TRAFFIC_CASE"
            g = 1
        }
        for (i = 1; i <= d; i++) {
            c[i] = ""
        }
        l = ""
        m = ""
        z = ""
        FINAL = ""
        for (j = 1; j <= FILTER; j++) {
            FI[j] = ""
        }
        for (i = 1; i <= NF; i++) {
            split($i, b, "=")
            if (a[b[1]] != "") {
                c[a[b[1]]] = b[2]
            }
            for (j = 1; j <= y; j++) {
                if (h[j "," a[b[1]]] == b[2] && h[j "," a[b[1]]] != "") {
                    l = a[b[1]] "," b[2]
                }
                if (x[j "," a[b[1]]] == b[2] && x[j "," a[b[1]]] != "") {
                    z = a[b[1]] "," b[2]
                }
                if (k[j "," l "," z] != "") {
                    m = k[j "," l "," z]
                }
            }
        }
        if (substr(FILENAME, 20, 3) == "SVC") {
            Q[c[a[MSISDN]]] = 1
        } else if (Q[c[a[MSISDN]]] != 1) {
            for (i = 1; i <= d; i++) {
                if (i == 1) {
                    f = c[1]
                } else {
                    f = f "," c[i]
                }
                if (c[i] == "") {
                    c[i] = "B"
                }
                for (j = 1; j <= FILTER; j++) {
                    if (FE[j "," i] != "") {
                        if (FE[j "," i] == c[i] && (FI[j] == "" || FI[j] <= FC[j])) {
                            FI[j] = FI[j] + 1
                        } else {
                            FI[j] = FI[j] - 1
                        }
                    }
                    if (FNE[j "," i] != "") {
                        if (FNE[j "," i] != c[i] && (FI[j] == "" || FI[j] <= FC[j])) {
                            FI[j] = FI[j] + 1
                        } else {
                            FI[j] = FI[j] - 1
                        }
                    }
                }
            }
        }
        for (j = 1; j <= FILTER; j++) {
            if (FI[j] == FC[j]) {
                FINAL = 1
            }
        }
        if (FINAL != 1) {
            print f "," m
        }
    }
}

现在,在您试图理解脚本的过程中,将其正确格式化为第2步,就是将所有变量重命名为它们真正代表的名称,例如a[$1]=NR-> param2lineNr[$1]=NRaa[NR]=$1-> lineNr2param[NR]=$1或类似的文件,取决于该$1文件中的Parameter.txt。您还应该引入一些中间的命名良好的变量,这样您就不会试图弄清楚像Q[c[a[MSISDN]]]这样的结构在下次您阅读代码时实际上意味着什么(对我来说,这很像一个错误,{{1 }}是未初始化的变量-作者可能的意思是MSISDN,但肯定是idk)。一旦对所有变量都做到了这一点,整个脚本的含义就应该清楚了。玩得开心!

答案 1 :(得分:-1)

我们在文件中有参数,交通情况和过滤器...这是一个足够大的脚本,它也希望包含在独立文件中。

我的过程:

  1. 我重新格式化并将结果放在vim中
  2. 我通过在第一组括号外进行模式匹配来消除了一个巢。
  3. 我看到数组","的串联...可以为此使用SUBSEP(经过进一步的重构,可能可以消除SUBSEP定义了……不相关
  4. 在那些地方看到了if (foo[bar] == "")的支票,我将其替换为更直观的if (bar in foo)
  5. 我开始用s/\<x\>/something_x/g替换-vim确实很不错,因为您可以使用\< \> ...
  6. 隔离变量名。
  7. 我用split("", arr)替换了空白数组
  8. 我看到b [1]是一个键,b [2]是一个值...最好创建中间变量,以使流量匹配部分更具可读性。
  9. 浏览每个变量开始显示未使用的变量,例如aa
  10. 我将foo = foo + 1更改为foo++
  11. 我在似乎合适的if语句中颠倒了逻辑条件,并摆脱了多余的{}
  12. 我对过滤位进行了一些重构,以减少数组查找的次数。

这是我的猜测:

#! /usr/bin/awk -f

BEGIN {
    FS = "|"
    fileno = 0
    SUBSEP = ","
}

FILENAME == "Parameter.txt" {
    param[$1] = NR
    param_sz = NR
    if (FNR == 1)
        param_list = $1
    else
        param_list = param_list "," $1
    next
}

FILENAME == "Traffic.csv" {
    traffic_h[FNR, $2] = $3
    traffic_x[FNR, $4] = $5
    traffic_key[FNR, $2, $3, $4, $5] = $1
    traffic_sz = FNR
    next
}

FILENAME == "Filter.txt" {
    for (i = 1; i <= NF; i += 3) {
        if ($(i + 1) == "ne")
            filter_ne[FNR, $i] = $(i + 2)
        else
            filter_eq[FNR, $i] = $(i + 2)
    }
    filter_sz = FNR
    filters[FNR] = (NF / 3)
    next
}

fileno == 0  {
    print param_list ",TRAFFIC_CASE"
    fileno = FNR
}

{
    split("", case)
    l = ""
    traffic_match = ""
    z = ""
    final_filter = 0
    split("", filtered)

    for (i = 1; i <= NF; i++) {
        split($i, b, "=")
        key = b[1]
        val = b[2]
        if (key in param) {
            this_param = param[key]
            case[this_param] = val
            for (t = 1; t <= traffic_sz; t++) {
                if ((t, this_param == val && traffic_h[t, this_param]) in traffic_h)
                    l = val
                if ((t, this_param == val && traffic_x[j, this_param]) in traffic_x)
                    z = val
                if ((t, this_param, l, this_param, z) in traffic_key)
                    traffic_match = traffic_key[t, this_param, l, this_param, z]
            }
        }
    }
    if (substr(FILENAME, 20, 3) == "SVC") {
        svc_case[case[param["MSISDN"]]] = "" # << MSISDN was = "" so... arbitrary string should be OK or... should fix your bug?
    }
    if (!(case[param["MSISDN"]] in svc_case)) {
        for (p = 1; p <= param_sz; p++) {
            if (p == 1)
                case_list = case[1]
            else
                case_list = case_list "," case[i]
            if (!(p in case))
                case[p] = "B"
            for (f = 1; f <= filter_sz; f++) {
                if ((f, p) in filter_eq) {
                    if (filter_eq[f, p] == case[p] && (!f in filtered) || filtered[f] <= filters[f]))
                        ++filtered[f]
                    else
                        --filtered[f]
                } else if ((j, i) in filter_ne) {
                    if (filter_ne[f, p] != case[p] && (!(f in filtered) || filtered[f] <= filters[f]))
                        ++filtered[f]
                    else
                        --filtered[f]
                }
            }
        }
    }
    for (f = 1; f <= filter_sz; f++)
        if (filtered[f] == filters[f])
            final_filter = 1
    if (!final_filter)
        print case_list "," traffic_match
}

所以,很可能我出了点问题,因为我没有用于测试用例的I / O,但是您可以重复我的步骤来弄清现实。我将进一步处理该过滤器...我将检查“ MSISDN”(如Ed所建议的那样),看看这对您来说是否有趣...

另一似乎令人怀疑的事情是,您表面上试图用“ |”解析.csv文件。作为分隔符...可能行不通。