awk:使用空标记

时间:2015-05-26 14:14:55

标签: xml csv awk

我有一个脚本可以将xml解析为csv。我不需要取值为空的标签,例如我有这个

<root>
  <record>
   <country>US</country>  
  <data>
            <id_client>50C</id_client>  
               <mail></mail>
            <adress>10 </adress>
            <num_tel>001</num_tel>
            <name>toto</name>
            <birth>01/30/008</birth>        
  </data> 
  <data>
            <id_client>100K</id_client>  
            <adress>10  </adress>
               <mail></mail>
              <num_tel></num_tel>
            <name>toto2</name>
            <birth>01/30/011</birth>                    
  </data> 
 </ record>
 <record>
   <country>China</country>  
  <data>
            <id_client>99E</id_client>  
            <mail>3@mail.com</mail>
            <adress>10  </adress>
            <name>toto3</name>
            <birth>01/30/0008</birth>       
  </data> 
  <data>
            <id_client>77B</id_client>  
            <mail></mail>
            <adress>10  </adress>
            <num_tel>004</num_tel>
            <name>toto4</name>
            <birth>2001/05/01</birth>                   
  </data> 
  </record
  </root>

使用awk脚本解析后的结果

 country;id_client;name; num_tel;mail
 US;50C;toto1;001
 US;100K;toto2
 China;99E;toto3;;3@mail.com
 China;77B;toto4;004

这是生成我的csv的脚本,但我不知道如何在某些标签中处理空值,如num_tel和mail

BEGIN {
    FS="[><]"; OFS=";"
    n = split("country  id_client name num_tel mail birth ",tags,/ /)
    for (i=1; i<=n; i++) {

        {printf "%s%s", tags[i], (i<n?OFS:ORS)}
    }
 }
{ tag2val[$2] = $3 }
 {
    for (i=1; i<=n; i++) {

     if ($(f[n[4]]=="") {printf ""} 
     else if  ($(f[n[5]]=="") {printf ""} 
     else 
        printf "%s%s", tag2val[tags[i]], (i<n?OFS:ORS)
    }
}

1 个答案:

答案 0 :(得分:2)

这不会输出您想要的内容,而是输出您想要的内容:

$ cat tst.awk
BEGIN {
    FS="[<>]"
    OFS=";"
    n=split("country id_client name num_tel mail birth",tags,/ /)
    for (i=1; i<=n; i++) {
        printf "%s%s", tags[i], (i<n?OFS:ORS)
    }
}
NF==5 { tag2val[$2] = $3 }
/<\/data>/ {
    for (i=1; i<=n; i++) {
        printf "%s%s", tag2val[tags[i]], (i<n?OFS:ORS)
    }
}
/<\/record>/ { delete tag2val }

$ awk -f tst.awk file
country;id_client;name;num_tel;mail;birth
US;50C;toto;001;;01/30/008
US;100K;toto2;;;01/30/011
China;99E;toto3;;3@mail.com;01/30/0008
China;77B;toto4;004;;2001/05/01