如何优化这个awk脚本?

时间:2019-06-12 16:04:57

标签: awk

我用awk浏览2个文件。我浏览第一个文件并将所需的列存储在数组中。这些数组之后,我使用它与第二个文件的列(8)进行比较。 我的脚本运行非常缓慢。我想知道是否没有优化的方法?

FNR==NR
{
    a[$1];
    ip[NR]=$1;
    site[NR]=$2;
    next
}


BEGIN{

FS="[\t,=]";
OFS="|";

}

sudo awk -f{
l=length(ip);

if($8 in a)
{
    for(k=0;k<=l;k++)
   {
     if(ip[k]== $8)
     {

      if(NF <= 70)
         {
           print  "siteID Ipam: "site[k],"siteID zsc: "$14,"date: " $4,"src: "$8,"dst: "$10,"role: "$22,"urlcategory: "$36, "urlsupercategory: "$38,"urlclass: "$40;
          }
      else
         {
           print "siteID Ipam: "site[k], "siteID zsc: "$14,"date: " $4, "src: " $8, "dst: " $10, "role: "$22, "urlcategory: " $37, "urlsupercategory: "$39, "urlclass: $41;
         }
      break;
     }
   }
}
else
{
print $8 " is not in referentiel ";
}
}

1 个答案:

答案 0 :(得分:0)

这是格式更好的初始错字。

BEGIN {
    FS = "[\t,=]";
    OFS = "|";
}
FNR == NR {
    a[$1];
    ip[NR] = $1;
    site[NR] = $2;
    next;
}
sudo awk -f {
    l = length(ip);

    if($8 in a) {
        for(k = 0; k <= l; k++) {
            if(ip[k] == $8) {
                if(NF <= 70) {
                    print  "siteID Ipam: "site[k],"siteID zsc: "$14,"date: " $4,"src: "$8,"dst: "$10,"role: "$22,"urlcategory: "$36, "urlsupercategory: "$38,"urlclass: "$40;
                }
                else {
                    print "siteID Ipam: "site[k], "siteID zsc: "$14,"date: " $4, "src: " $8, "dst: " $10, "role: "$22, "urlcategory: " $37, "urlsupercategory: "$39, "urlclass: $41;
                }
                break;
            }
        }
    } else {
        print $8 " is not in referentiel ";
    }
}

建议:

  1. 修复sudo awk -f错字。

  2. a[$1];-> a[$1] = 1;

  3. ($8 in a)-> (a[$8])