我试图比较TSV文件中的第三和第五列,这些列有一组用空格分隔的数字,并希望通过bash编程在新列中打印匹配编号。事实上,我为这些列创建了两个数组,以便比较它们并打印匹配的数字,如果第三列和第五列之间没有匹配则打印“不匹配”。但是代码无法正常工作。我尝试过类似的脚本,但我没有得到它。总之,输入类似于:
rs935 FUCA 6 29 61 63 67 70 133 134 150 159 166 208 220 260 V 260 135 49 159
rs699 AGT 10 113 117 255 263 288 312 369 381 M 268
rs4230 CFHR1 12 69 85 119 132 141 161 171 173 177 248 252 270 281 291 302 305 309 314 R 302 237
,所需的输出应为:
rs935 FUCA V:159-260
rs699 AGT M:not matches
rs4230 CFHR1 R:302
......等等
这是代码:
awk 'BEGIN {FS=OFS="\t"}
{delete a n=split($3,a3," ");
m=split($5,a5," ");
for(i=1;i<=m;i++) a[a5[i]];
SEP="\n"
for (i=1;i<=n;i++) {if(a3[i] in a) {both=both SEP a3[i]; SEP="-"}
print $1,$2,$4 ":" both }' "input.tsv" > "output.tsv"
如何在unix环境中获取它?提前致谢
答案 0 :(得分:0)
您的问题非常不明确,但这是您正在尝试做的事情吗?
$ cat tst.awk
BEGIN { FS=OFS="\t" }
{
delete a3values
split($3,a3list,/ /)
for (idx in a3list) {
a3value = a3list[idx]
a3values[a3value]
}
split($5,a5list,/ /)
both = "not matches"
for (idx in a5list) {
a5value = a5list[idx]
if (a5value in a3values) {
both = (both == "not matches" ? "" : both "-") a5value
}
}
print $1, $2, $4 ":" both
}
$ awk -f tst.awk file
rs935 FUCA V:260-159
rs699 AGT M:not matches
rs4230 CFHR1 R:302