我正在尝试使用awk
查找input
中的关键字和找到的打印指定字段。下面的awk
确实运行但不会产生所需的输出。应该发生的是,如果在行中找到TYPE=ins or TYPE=del
,则打印$1,$2,$4,$5, and the LEN=
。 LEN=
也是该行中的字段,其中=
后面有一个数字。谢谢你:)。
输入
chr1 1647893 . C CTTTCTT 31.9545 PASS AF=0.330827;AO=179;DP=695;FAO=132;FDP=399;FR=.;FRO=267;FSAF=67;FSAR=65;FSRF=124;FSRR=143;FWDB=0.0145873;FXX=0.00249994;HRUN=1;LEN=6;MLLD=190.481;OALT=TTTCTT;OID=.;OMAPALT=CTTTCTT;OPOS=1647894;OREF=-;PB=0.5;PBP=1;QD=0.320346;RBI=0.0146526;REFB=-0.0116875;REVB=0.00138131;RO=471;SAF=85;SAR=94;SRF=236;SRR=235;SSEN=0;SSEP=0;SSSB=-0.0324817;STB=0.528856;STBP=0.43;TYPE=ins;VARB=0.0222858 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:31:695:399:471:267:179:132:0.330827:94:85:236:235:65:67:124:143
chr1 1650787 . T C 483.012 PASS AF=0.39;AO=181;DP=459;FAO=156;FDP=400;FR=.;FRO=244;FSAF=100;FSAR=56;FSRF=162;FSRR=82;FWDB=-0.00931067;FXX=0;HRUN=1;LEN=1;MLLD=210.04;OALT=C;OID=.;OMAPALT=C;OPOS=1650787;OREF=T;PB=0.5;PBP=1;QD=4.83012;RBI=0.018986;REFB=-0.0114993;REVB=-0.0165463;RO=276;SAF=116;SAR=65;SRF=184;SRR=92;SSEN=0;SSEP=0;SSSB=-0.0305478;STB=0.515311;STBP=0.652;TYPE=snp;VARB=0.019956 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:483:459:400:276:244:181:156:0.39:65:116:184:92:56:100:162:82
chr1 17034455 . CGCGCGCGT C 50 PASS AF=0.205882;AO=56;DP=272;FR=.;LEN=8;OALT=-;OID=.;OMAPALT=C;OPOS=17034456;OREF=GCGCGCGT;RO=216;SAF=27;SAR=29;SRF=112;SRR=104;TYPE=del GT:GQ:DP:RO:AO:SAF:SAR:SRF:SRR:AF 0/1:99:272:216:56:27:29:112:104:0.205882
AWK
awk '/TYPE=ins/ {print $1,$2,$4,$5, "/TYPE=*/" "/LEN=*/" $0;next} /TYPE=del/ {print $1,$2,$4,$5, "/TYPE=*/" "/LEN=*/" $0;next} 1' input > out
所需的输出
chr1 1647893 C CTTTCTT TYPE=ins LEN=6
chr1 17034455 CGCGCGCGT C TYPE=del LEN=8
答案 0 :(得分:1)
这是一个awk解决方案:
awk '$0~"TYPE=del" || $0~"TYPE=ins"{max=split($0,ar,";")
len=""
type=""
for(i=1; i<=max; i++){
if(ar[i]~"LEN="){len=ar[i]}
if(ar[i]~"TYPE="){type=ar[i]}
}
print $1,$2,$4,$5,type,len}' input
输出:
chr1 1647893 C CTTTCTT TYPE=ins LEN=6
chr1 17034455 CGCGCGCGT C TYPE=del LEN=8
答案 1 :(得分:1)
您可以使用此awk命令:
awk 'function find(str) {
return substr($0, match($0, str "=[^; \t]+"), RLENGTH);
}
/TYPE=(ins|del)/ {
print $1, $2, $4, $5, find("TYPE"), find("LEN")
}' file
<强>输出:强>
chr1 1647893 C CTTTCTT TYPE=ins LEN=6
chr1 17034455 CGCGCGCGT C TYPE=del LEN=8