awk根据条件匹配两个文件并在新文件中打印输出

时间:2014-03-10 13:04:51

标签: awk

我有2个文件alpha1.csvSPAM1.csv

$ cat alpha1.csv  
AKTEL_BANGLADESH,BANGLADESH,Alphanumeric_A_MSISDN_blocking,1095   
ALJAWAL_SAUDI_TELECOM_COMPANY,SAUDI_ARABIA,Alphanumeric_A_MSISDN_blocking,9592  
B-MOBILE_BRUNEI,BRUNEI,Alphanumeric_A_MSISDN_blocking,3  


$ cat SPAM1.csv  
AIN_AIS_GLOBAL_COMMUNICATIONS,THAILAND,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),1  
AKTEL_BANGLADESH,BANGLADESH,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),16  
ALJAWAL_SAUDI_TELECOM_COMPANY,SAUDI_ARABIA,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),10593  
AT&T_WIRELESS,UNITED_STATES,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),218  
BANGLALINK_SHEBA_BANGLADESH,BANGLADESH,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),111  

预期产出:

AIN_AIS_GLOBAL_COMMUNICATIONS,THAILAND,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),1,**NA,NA**  
AKTEL_BANGLADESH,BANGLADESH,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),16,Alphanumeric_A_MSISDN_blocking,1095  
ALJAWAL_SAUDI_TELECOM_COMPANY,SAUDI_ARABIA,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),10593,Alphanumeric_A_MSISDN_blocking,9592  
AT&T_WIRELESS,UNITED_STATES,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),218,**NA,NA**  
BANGLALINK_SHEBA_BANGLADESH,BANGLADESH,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),111,**NA,NA**  
B-MOBILE_BRUNEI,BRUNEI,**NA,NA**,Alphanumeric_A_MSISDN_blocking,3  

我的命令只打印文件2的匹配案例,而不打印不匹配的案例:

$ awk 'BEGIN{FS=OFS=","} FNR==NR {a[$1,$2]=$3 FS $4; next} {print $0, (i=a[$1,$2]?a[$1,$2]:"NA,NA")}' alpha1.csv SPAM1.csv  
AIN_AIS_GLOBAL_COMMUNICATIONS,THAILAND,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),1,NA,NA  
AKTEL_BANGLADESH,BANGLADESH,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),16,Alphanumeric_A_MSISDN_blocking,1095  
ALJAWAL_SAUDI_TELECOM_COMPANY,SAUDI_ARABIA,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),10593,Alphanumeric_A_MSISDN_blocking,9592  
AT&T_WIRELESS,UNITED_STATES,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),218,NA,NA  
BANGLALINK_SHEBA_BANGLADESH,BANGLADESH,SPAM_CHAIN_SMS_REJECT(Spam_Detection_and_Blocking),111,NA,NA  

1 个答案:

答案 0 :(得分:1)

重新使用现有代码:

awk ' BEGIN { FS = OFS ="," } 
FNR==NR {
    a[$1 FS $2] = $3 FS $4; 
    next
} 
{
    print $0, (($1 FS $2) in a?a[$1 FS $2]:"NA,NA")
    delete a[$1 FS $2]
}
END {
    for(left in a) print left,"NA,NA", a[left]
}' alpha spam

awk ' BEGIN { FS = OFS ="," } 
FNR==NR {
    a[$1,$2] = $3 FS $4; 
    next
} 
{
    print $0, (($1,$2) in a?a[$1,$2]:"NA,NA")
    delete a[$1,$2]
}
END {
    for(left in a) {
        split(left, tmp, SUBSEP)
        print tmp[1], tmp[2], "NA,NA", a[left]
    }
}' alpha spam