我有一些文本文件。第四列表示蛋白质残基,第五列表示蛋白质残基的类型.C表示线圈,T表示转弯,S表示弯曲。我想计算每个文件中C + T + S的平均值:公式为total no:of C+T+S/total no:of residues.
FILE1.TXT
311 446 E L C 0 0 78 0, 0.0 -239,-0.1 0, 0.0 0, 0.0 0.000 360.0 360.0 360.0-115.3 -6.6 19.5 -28.9
312 447 E K C + 0 0 109 1,-0.2 -240,-0.0 -239,-0.0 0, 0.0 0.747 360.0 90.4 57.7 22.2 -8.5 22.8 -28.5
313 448 E K S S+ 0 0 194 2,-0.0 -1,-0.2 0, 0.0 0, 0.0 -0.107 72.2 141.1-120.6 7.9 -10.0 21.6 -31.8
314 449 E Q C - 0 0 28 1,-0.1 2,-0.6 15,-0.0 -242,-0.1 -0.216 56.0-123.3 -57.3 138.8 -12.1 20.3 -28.9
315 450 E V E +K 327 0B 82 12,-0.8 12,-2.9 2,-0.0 2,-0.4 -0.786 33.3 172.1 -96.1 120.7 -15.8 20.2 -29.2
316 451 E E E +K 326 0B 83 -2,-0.6 10,-0.2 10,-0.2 2,-0.2 -0.987 1.7 163.8-128.9 140.8 -18.0 22.0 -26.7
317 452 E T E -K 325 0B 68 8,-2.3 8,-2.5 -2,-0.4 2,-0.4 -0.656 37.5 -96.6-133.1-171.0 -21.7 22.6 -26.7
318 453 E R E -K 324 0B 158 6,-0.2 6,-0.2 -2,-0.2 4,-0.0 -0.949 35.1-119.5-118.2 134.9 -24.4 23.6 -24.2
319 454 E T C > - 0 0 29 4,-2.8 3,-2.7 -2,-0.4 4,-0.1 -0.217 43.2 -90.7 -65.2 165.3 -26.6 21.2 -22.4
320 455 E A T 3 S+ 0 0 122 1,-0.3 -1,-0.1 2,-0.1 -2,-0.0 0.771 132.2 45.8 -47.6 -27.4 -30.3 21.4 -23.0
321 456 E D T 3 S- 0 0 122 2,-0.0 -1,-0.3 0, 0.0 -2,-0.1 -0.032 128.4 -91.1-110.0 30.5 -30.3 23.7 -20.0
322 457 E G S < S+ 0 0 47 -3,-2.7 2,-0.3 1,-0.2 -2,-0.1 0.723 74.9 143.9 73.3 25.7 -27.3 25.9 -20.9
FILE2.TXT
140 361 B G C 0 0 53 0, 0.0 2,-0.2 0, 0.0 -53,-0.1 0.000 360.0 360.0 360.0 97.3 25.2 -16.6 -6.6
141 362 B A C - 0 0 70 -55,-0.1 -56,-0.0 1,-0.0 0, 0.0 -0.649 360.0 -33.9-148.3 84.1 28.0 -18.6 -4.8
142 363 B R C - 0 0 224 -2,-0.2 -43,-0.1 1,-0.1 -55,-0.1 1.000 68.7-149.8 66.4 76.9 31.1 -16.5 -4.0
143 364 B A C + 0 0 25 -57,-0.1 2,-0.3 -43,-0.1 -43,-0.2 -0.357 27.9 158.9 -70.8 155.7 31.6 -13.9 -6.7
144 365 B H B -K 99 0B 51 -45,-2.7 -45,-2.6 -2,-0.0 2,-0.1 -0.947 40.1 -66.6-163.2-179.4 35.1 -12.8 -7.5
145 366 B S C 0 0 40 -2,-0.3 -47,-0.2 -47,-0.2 -44,-0.1 -0.467 360.0 360.0 -80.5 153.0 37.6 -11.3 -10.0
146 367 B S C 0 0 74 -2,-0.1 -1,-0.1 -46,-0.1 -48,-0.1 0.787 360.0 360.0 -70.5 360.0 38.5 -13.0 -13.3
file3.txt
256 1417 C T E +f 129 0C 82 -2,-0.4 2,-0.3 -128,-0.2 -126,-0.2 -0.911 15.9 171.9-119.8 151.1 22.7 -9.3 39.1
257 1418 C V E -f 130 0C 74 -128,-1.7 -126,-2.3 -2,-0.4 2,-0.3 -0.968 27.7-119.0-148.2 152.8 22.9 -13.0 38.4
258 1419 C K E -f 131 0C 147 -2,-0.3 2,-0.5 -128,-0.2 -126,-0.2 -0.752 10.2-157.2-104.3 149.0 20.7 -15.8 39.6
259 1420 C E E -f 132 0C 18 -128,-2.8 -126,-1.8 -2,-0.3 -125,-0.5 -0.987 9.4-178.4-118.2 119.5 21.4 -18.8 41.8
期望的输出
file1 8/12= 0.67
file2 6/7 = 0.86
file3 0/4 = 0.00
答案 0 :(得分:2)
使用awk
awk '$5~/(C|T|S)/{sum[FILENAME]++}{line[FILENAME]=FNR}
END{for (i in line)
printf "%s %s/%s = %.2f\n",i,sum[i]+0,line[i],sum[i]/line[i]
}' file*.txt
file1.txt 8/12 = 0.67
file2.txt 6/7 = 0.86
file3.txt 0/4 = 0.00