awk在两个文件之间提取标签

时间:2017-07-26 12:21:43

标签: awk

awk下面按原样执行并导致当前输出,我试图添加一个条件来提取文本或 标记AF=FR=HRUN=LEN=TYPE=后的file1每行中的行与{{1}相比较的值}}。就像之间的界限一样 这两个文件是file2MatchMissing in file 1,但我无法添加条件以提取Missing in file2(分号)。 标签后可能并不总是有文字,但它们始终以;结尾。 ;中的小数也是3个显着的数字,以便于阅读。它似乎 关闭,但有一些事情,我不太清楚该怎么做。谢谢 :)。

文件1

$6

file2的

chr1    43814978    COSM27286    G    A    86.92679999999999    PASS    
AF=0;AO=1;DP=5535;FAO=0;FR=.,REALIGNEDx0.008;HRUN=1;LEN=1;TYPE=snp;VARB=0;HS; 
chr1    43814981    COSM27287    G    A    86.83350000000002    PASS    
AF=0;AO=2;DP=5556;FAO=0;FR=.;HRUN=1;LEN=1;TYPE=snp;VARB=0;HS;
chr1    43815008    COSM29008;COSM43212    TGG    AAA,AAG    70.3099    PASS        
AF=0,0;AO=0,0;DP=5528;FAO=0,0;FR=.,.,;HRUN=1,1;LEN=3,2,;TYPE=mnp,mnp;VARB=0,0;HS;

所需的输出

chr1    43814979    COSM27286    G    A    86.92679999999999    PASS    
AF=0;AO=1;DP=5535;FAO=0;FR=.,REALIGNEDx0.008;HRUN=1;LEN=1;TYPE=snp;VARB=0;HS; 
chr1    43814981    COSM27287    G    A    86.83350000000002    PASS    
AF=0;AO=2;DP=5556;FAO=0;FR=.;HRUN=1;LEN=1;TYPE=snp;VARB=0;HS;
chr1    43815008    COSM29008;COSM43212    TGG    AAA,AAG    70.3099    PASS        
AF=0,0;AO=0,0;DP=5528;FAO=0,0;FR=.,.,;HRUN=1,1;LEN=3,2,;TYPE=mnp,mnp;VARB=0,0;HS;

AWK

Match:
chr1    43814981    COSM27287    G    A    86.8    PASS    
AF=0;FR=.;HRUN=1;LEN=1;TYPE=snp
chr1    43815008    COSM29008;COSM43212    TGG    AAA,AAG    70.3099    PASS        
AF=0,0;FR=.,.,;HRUN=1,1;LEN=3,2,;TYPE=mnp,mnp
Missing in file1:
chr1    43814979    COSM27286    G    A    86.9    PASS    
AF=0;FR=.,REALIGNEDx0.008;HRUN=1;LEN=1;TYPE=snp
Missing in file2:
chr1    43814978    COSM27286    G    A    86.9    PASS    
AF=0;FR=.,REALIGNEDx0.008;HRUN=1;LEN=1;TYPE=snp

当前输出

awk 'FNR==1 { next }
 FNR == NR { file1[$1,$2,$3,$4,$5,$6,$7] = $1 " " $2 " " $3 " " $4 " " $5 " " $6 " "$7 }
 FNR != NR { file2[$1,$2,$3,$4,$5,$6,$7] = $1 " " $2 " " $3 " " $4 " " $5 " " $6 " "$7 }
 END { print "Match:"; for (k in file1) if (k in file2) print file1[k] # Or file2[k]
       print "Missing in file1:"; for (k in file2) if (!(k in file1)) print file2[k]
       print "Missing in file2:"; for (k in file1) if (!(k in file2)) print file1[k]
 }' file1 file2 > output

1 个答案:

答案 0 :(得分:1)

尝试:

<?xml version="1.0" encoding="UTF-8"?>
<configuration>
    <system.webServer>
         <rewrite>
             <rules>
                <rule name="Redirect blog to new url" enabled="true" stopProcessing="true">
                    <match url=".*" />
                    <conditions logicalGrouping="MatchAll">
                        <add input="{HTTP_HOST}" pattern=".*blog\.example\.com.*" />
                    </conditions>
                    <action type="Redirect" url="https://example.com/blog/{R:0}" />
                </rule>
             </rules>
         </rewrite>
    </system.webServer>
</configuration>

输出如下。

awk 'FNR==NR{
             a[$1,$2,$7]=$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7;
             next
            }
     (($1,$2,$7) in a){
             val_match=val_match?val_match ORS a[$1,$2,$7]:a[$1,$2,$7];
             delete a[$1,$2,$7];
             next
                      }
{
    val_mismatch_in_file1=val_mismatch_in_file1?val_mismatch_in_file1 ORS $1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7:$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7;
}
END{
    for(i in a){
        val_missing_in_file2=val_missing_in_file2?a[i]:a[i]};
        print "Match:" RS val_match RS "Missing in File1:" RS val_mismatch_in_file1 RS "Missing in File2:" RS val_missing_in_file2
   }
    '  Input_file1  Input_file2