使用bash脚本转置特定列

时间:2014-09-09 22:02:14

标签: bash awk sed transpose

我有一个包含以下模式的大文件(在文件中重复了几千次):

#
# Output from 'compseq'

# The Expected frequencies are calculated on the (false) assumption that every
# word has equal frequency.
#
# The input sequences are:
#   s21_contig00001


Word size   4
Total count 49466

#
# Word  Obs Count   Obs Frequency   Exp Frequency   Obs/Exp Frequency
#
AAAA    573 0.0115837   0.0039062   2.9654308
AAAC    301 0.0060850   0.0039062   1.5577568
AAAG    305 0.0061659   0.0039062   1.5784579
AAAT    345 0.0069745   0.0039062   1.7854688
AACA    227 0.0045890   0.0039062   1.1747867
AACC    113 0.0022844   0.0039062   0.5848057
AACG    321 0.0064893   0.0039062   1.6612623
AACT    109 0.0022035   0.0039062   0.5641046
AAGA    222 0.0044879   0.0039062   1.1489104
AAGC    339 0.0068532   0.0039062   1.7544172
AAGG    196 0.0039623   0.0039062   1.0143533
AAGT    169 0.0034165   0.0039062   0.8746210
AATA    129 0.0026079   0.0039062   0.6676101
AATC    226 0.0045688   0.0039062   1.1696115
AATG    286 0.0057817   0.0039062   1.4801278
AATT    196 0.0039623   0.0039062   1.0143533
ACAA    211 0.0042656   0.0039062   1.0919824
ACAC    91  0.0018396   0.0039062   0.4709497
ACAG    103 0.0020822   0.0039062   0.5330530
ACAT    167 0.0033761   0.0039062   0.8642704
ACCA    80  0.0016173   0.0039062   0.4140218
ACCC    72  0.0014555   0.0039062   0.3726196
ACCG    217 0.0043869   0.0039062   1.1230340
ACCT    52  0.0010512   0.0039062   0.2691141
ACGA    322 0.0065095   0.0039062   1.6664376
ACGC    201 0.0040634   0.0039062   1.0402297
ACGG    252 0.0050944   0.0039062   1.3041685
ACGT    202 0.0040836   0.0039062   1.0454049
ACTA    35  0.0007076   0.0039062   0.1811345
ACTC    75  0.0015162   0.0039062   0.3881454
ACTG    87  0.0017588   0.0039062   0.4502487
ACTT    169 0.0034165   0.0039062   0.8746210
AGAA    158 0.0031941   0.0039062   0.8176930
AGAC    91  0.0018396   0.0039062   0.4709497
AGAG    100 0.0020216   0.0039062   0.5175272
AGAT    84  0.0016981   0.0039062   0.4347228
AGCA    230 0.0046497   0.0039062   1.1903125
AGCC    185 0.0037399   0.0039062   0.9574253
AGCG    350 0.0070756   0.0039062   1.8113452
AGCT    218 0.0044071   0.0039062   1.1282093
AGGA    144 0.0029111   0.0039062   0.7452392
AGGC    148 0.0029920   0.0039062   0.7659402
AGGG    109 0.0022035   0.0039062   0.5641046
AGGT    52  0.0010512   0.0039062   0.2691141
AGTA    64  0.0012938   0.0039062   0.3312174
AGTC    88  0.0017790   0.0039062   0.4554239
AGTG    105 0.0021227   0.0039062   0.5434035
AGTT    109 0.0022035   0.0039062   0.5641046
ATAA    136 0.0027494   0.0039062   0.7038370
ATAC    100 0.0020216   0.0039062   0.5175272
ATAG    64  0.0012938   0.0039062   0.3312174
ATAT    154 0.0031132   0.0039062   0.7969919
ATCA    242 0.0048922   0.0039062   1.2524158
ATCC    172 0.0034771   0.0039062   0.8901468
ATCG    431 0.0087131   0.0039062   2.2305422
ATCT    84  0.0016981   0.0039062   0.4347228
ATGA    311 0.0062871   0.0039062   1.6095096
ATGC    230 0.0046497   0.0039062   1.1903125
ATGG    213 0.0043060   0.0039062   1.1023329
ATGT    167 0.0033761   0.0039062   0.8642704
ATTA    110 0.0022237   0.0039062   0.5692799
ATTC    166 0.0033558   0.0039062   0.8590951
ATTG    216 0.0043666   0.0039062   1.1178587
ATTT    345 0.0069745   0.0039062   1.7854688
CAAA    392 0.0079246   0.0039062   2.0287066
CAAC    206 0.0041645   0.0039062   1.0661060
CAAG    272 0.0054987   0.0039062   1.4076740
CAAT    216 0.0043666   0.0039062   1.1178587
CACA    81  0.0016375   0.0039062   0.4191970
CACC    131 0.0026483   0.0039062   0.6779606
CACG    139 0.0028100   0.0039062   0.7193628
CACT    105 0.0021227   0.0039062   0.5434035
CAGA    57  0.0011523   0.0039062   0.2949905
CAGC    303 0.0061254   0.0039062   1.5681074
CAGG    67  0.0013545   0.0039062   0.3467432
CAGT    87  0.0017588   0.0039062   0.4502487
CATA    127 0.0025674   0.0039062   0.6572595
CATC    326 0.0065904   0.0039062   1.6871386
CATG    182 0.0036793   0.0039062   0.9418995
CATT    286 0.0057817   0.0039062   1.4801278
CCAA    215 0.0043464   0.0039062   1.1126835
CCAC    87  0.0017588   0.0039062   0.4502487
CCAG    100 0.0020216   0.0039062   0.5175272
CCAT    213 0.0043060   0.0039062   1.109
CCCA    106 0.0021429   0.0039062   0.5485788
CCCC    135 0.0027291   0.0039062   0.6917
CCCG    212 0.0042858   0.0039062   1.096
CCCT    109 0.0022035   0.0039062   0.56
CCGA    276 0.0055796   0.0039062   1.42
CCGC    382 0.0077225   0.0039062   1.97
CCGG    294 0.0059435   0.0039062   1.521
CCGT    252 0.0050944   0.0039062   1.304
CCTA    36  0.0007278   0.0039062   0.1863098
CCTC    153 0.0030930   0.0039062   0.7918166
CCTG    67  0.0013545   0.0039062   0.3467432
CCTT    196 0.0039623   0.0039062   1.0143533
CGAA    328 0.0066308   0.0039062   1.6974892
CGAC    319 0.0064489   0.0039062   1.6509117
CGAG    241 0.0048720   0.0039062   1.2472405
CGAT    431 0.0087131   0.0039062   2.2305422
CGCA    247 0.0049933   0.0039062   1.2782922
CGCC    465 0.0094004   0.0039062   2.4065014
CGCG    358 0.0072373   0.0039062   1.8527473
CGCT    350 0.0070756   0.0039062   1.8113452
CGGA    283 0.0057211   0.0039062   1.4646019
CGGC    492 0.0099462   0.0039062   2.546
CGGG    212 0.0042858   0.0039062   1.09
CGGT    217 0.0043869   0.0039062   1.1230
CGTA    136 0.0027494   0.0039062   0.703
CGTC    381 0.0077023   0.0039062   1.971
CGTG    139 0.0028100   0.0039062   0.7193628
CGTT    321 0.0064893   0.0039062   1.6612623
CTAA    44  0.0008895   0.0039062   0.2220
CTAC    42  0.0008491   0.0039062   0.2173614
CTAG    12  0.0002426   0.0039062   0.063
CTAT    64  0.0012938   0.0039062   0.331
CTCA    131 0.0026483   0.0039062   0.676
CTCC    160 0.0032345   0.0039062   0.825
CTCG    241 0.0048720   0.0039062   1.2472405
CTCT    100 0.0020216   0.0039062   0.5175272
CTGA    143 0.0028909   0.0039062   0.74
CTGC    168 0.0033963   0.0039062   0.867
CTGG    100 0.0020216   0.0039062   0.51
CTGT    103 0.0020822   0.0062  0.5330
CTTA    61  0.0012332   0.0039062   0.3156916
CTTC    288 0.0058222   0.0039062   1.493
CTTG    272 0.0054987   0.0039062   1.4040
CTTT    305 0.0061659   0.0032  1.579
GAAA    399 0.0080661   0.0039062   2.064
GAAC    211 0.0042656   0.0039062   1.024
GAAG    288 0.0058222   0.0062  1.4904783
GAAT    166 0.0033558   0.0039062   0.8590951
GACA    188 0.0038006   0.0062  0.9729511
GACC    132 0.0026685   0.0039062   0.6831359
GACG    381 0.0077023   0.0039062   1.9717786
GACT    88  0.0017790   0.0039062   0.4554239
GAGA    117 0.0023653   0.0039062   0.6055068
GAGC    287 0.0058020   0.0039062   1.4853030
GAGG    153 0.0030930   0.0039062   0.7918166
GAGT    75  0.0015162   0.0039062   0.384
GATA    137 0.0027696   0.0039062   0.709
GATC    240 0.0048518   0.0039062   1.242
GATG    326 0.0065904   0.0039062   1.6876
GATT    226 0.0045688   0.0039062   1.16
GCAA    344 0.0069543   0.0039062   1.785
GCAC    151 0.0030526   0.0039062   0.781
GCAG    168 0.0033963   0.0039062   0.867
GCAT    230 0.0046497   0.0039062   1.195
GCCA    260 0.0052561   0.0039062   1.307
GCCC    186 0.0037602   0.0039062   0.9006
GCCG    492 0.0099462   0.0039062   2.5438
GCCT    148 0.0029920   0.0039062   0.7602
GCGA    367 0.0074192   0.0039062   1.8993248
GCGC    470 0.0095015   0.0039062   2.4323778
GCGG    382 0.0077225   0.0039062   1.9769539
GCGT    201 0.0040634   0.0039062   1.0402297
GCTA    54  0.0010917   0.0039062   0.2794647
GCTC    287 0.0058020   0.0039062   1.4853030
GCTG    303 0.0061254   0.0039062   1.5681074
GCTT    339 0.0068532   0.0039062   1.7544172
GGAA    295 0.0059637   0.0039062   1.5267052
GGAC    138 0.0027898   0.0039062   0.7141875
GGAG    160 0.0032345   0.0039062   0.8280435
GGAT    172 0.0034771   0.0039062   0.8901468
GGCA    250 0.0050540   0.0039062   1.2938180
GGCC    186 0.0037602   0.0039062   0.9626006
GGCG    465 0.0094004   0.0039062   2.4065014
GGCT    185 0.0037399   0.0039062   0.9574253
GGGA    169 0.0034165   0.0039062   0.874610
GGGC    186 0.0037602   0.0039062   0.962606
GGGG    135 0.0027291   0.0039062   0.6986617
GGGT    72  0.0014555   0.0039062   0.372196
GGTA    45  0.0009097   0.0039062   0.2328872
GGTC    132 0.0026685   0.39062 0.6831359
GGTG    131 0.0026483   0.39062 0.6779606
GGTT    113 0.0022844   0.39062 0.584857
GTAA    93  0.0018801   0.39062 0.48133
GTAC    86  0.0017386   0.39062 0.4450734
GTAG    42  0.0008491   0.0039062   0.2173614
GTAT    100 0.0020216   0.0039062   0.5175272
GTCA    241 0.0048720   0.0039062   1.2472405
GTCC    138 0.0027898   0.0039062   0.7141875
GTCG    319 0.0064489   0.0039062   1.6509117
GTCT    91  0.0018396   0.0039062   0.4709497
GTGA    127 0.0025674   0.0039062   0.6572595
GTGC    151 0.0030526   0.0039062   0.7814661
GTGG    87  0.0017588   0.0039062   0.4502487
GTGT    91  0.0018396   0.0039062   0.4709497
GTTA    52  0.0010512   0.0039062   0.2691141
GTTC    211 0.0042656   0.0039062   1.0919824
GTTG    206 0.0041645   0.0039062   1.0660
GTTT    301 0.0060850   0.0039062   1.558
TAAA    160 0.0032345   0.0039062   0.825
TAAC    52  0.0010512   0.0039062   0.261
TAAG    61  0.0012332   0.0039062   0.31
TAAT    110 0.0022237   0.0039062   0.569
TACA    76  0.0015364   0.0039062   0.397
TACC    45  0.0009097   0.0039062   0.23
TACG    136 0.0027494   0.0039062   0.70
TACT    64  0.0012938   0.0039062   0.33
TAGA    37  0.0007480   0.0039062   0.19
TAGC    54  0.0010917   0.0039062   0.2794647
TAGG    36  0.0007278   0.0039062   0.1863098
TAGT    35  0.0007076   0.0039062   0.1811345
TATA    60  0.0012130   0.0039062   0.3105163
TATC    137 0.0027696   0.0039062   0.7090123
TATG    127 0.0025674   0.0039062   0.6572595
TATT    129 0.0026079   0.0039062   0.6676101
TCAA    316 0.0063882   0.0039062   1.6353859
TCAC    127 0.0025674   0.0039062   0.6572595
TCAG    143 0.0028909   0.0039062   0.7400639
TCAT    311 0.0062871   0.0039062   1.6095096
TCCA    169 0.0034165   0.0039062   0.8746210
TCCC    169 0.0034165   0.0039062   0.8746210
TCCG    283 0.0057211   0.0039062   1.4646019
TCCT    144 0.0029111   0.0039062   0.7452392
TCGA    354 0.0071564   0.0039062   1.8320463
TCGC    367 0.0074192   0.0039062   1.8993248
TCGG    276 0.0055796   0.0039062   1.4283750
TCGT    322 0.0065095   0.0039062   1.6664376
TCTA    37  0.0007480   0.0039062   0.1914851
TCTC    117 0.0023653   0.0039062   0.6055068
TCTG    57  0.0011523   0.0039062   0.2949905
TCTT    222 0.0044879   0.0039062   1.1489104
TGAA    283 0.0057211   0.0039062   1.4646019
TGAC    241 0.0048720   0.0039062   1.2472405
TGAG    131 0.0026483   0.0039062   0.6779606
TGAT    242 0.0048922   0.0039062   1.2524158
TGCA    166 0.0033558   0.0039062   0.8590951
TGCC    250 0.0050540   0.0039062   1.2938180
TGCG    247 0.0049933   0.0039062   1.2782922
TGCT    230 0.0046497   0.0039062   1.1903125
TGGA    169 0.0034165   0.39062 0.8746210
TGGC    260 0.0052561   0.39062 1.3455707
TGGG    106 0.0021429   0.39062 0.5485788
TGGT    80  0.0016173   0.39062 0.4140218
TGTA    76  0.0015364   0.39062 0.3933207
TGTC    188 0.0038006   0.39062 0.9729511
TGTG    81  0.0016375   0.39062 0.4191970
TGTT    227 0.0045890   0.39062 1.1747867
TTAA    110 0.0022237   0.39062 0.5692799
TTAC    93  0.0018801   0.39062 0.4813003
TTAG    44  0.0008895   0.39062 0.2277120
TTAT    136 0.0027494   0.39062 0.7038370
TTCA    283 0.0057211   0.39062 1.4646019
TTCC    295 0.0059637   0.39062 1.5267052
TTCG    328 0.0066308   0.39062 1.6974892
TTCT    158 0.0031941   0.39062 0.8176930
TTGA    316 0.0063882   0.39062 1.6353859
TTGC    344 0.0069543   0.39062 1.7802935
TTGG    215 0.0043464   0.39062 1.1126835
TTGT    211 0.0042656   0.39062 1.0919824
TTTA    160 0.0032345   0.0039062   0.8280435
TTTC    399 0.0080661   0.0039062   2.0649335
TTTG    392 0.0079246   0.0039062   2.0287066
TTTT    573 0.0115837   0.0039062   2.9654308

Other   0   0.0000000   0.0000000   10000000000.0000000
#
# Output from 'compseq'
#
# The Expected frequencies are calculated on the (false) assumption that every
# word has equal frequency.
#
# The input sequences are:
#   s21_contig00002


Word size   4
Total count 29078

#
# Word  Obs Count   Obs Frequency   Exp Frequency   Obs/Exp Frequency
#
AAAA    364 0.0125181   0.0039062   3.2046221
AAAC    202 0.0069468   0.0039062   1.7783892
AAAG    170 0.0058463   0.0039062   1.4966641
AAAT    227 0.0078066   0.0039062   1.9984868
AACA    178 0.0061215   0.0039062   1.5670954
AACC    87  0.0029920   0.0039062   0.7659399
AACG    168 0.0057776   0.0039062   1.4790563
AACT    82  0.0028200   0.39062 0.7219204
AAGA    146 0.0050210   0.39062 1.2853704
AAGC    188 0.0064654   0.39062 1.6551345
AAGG    142 0.0048834   0.39062 1.2501548
AAGT    87  0.0029920   0.39062 0.7659399
AATA    150 0.0051585   0.39062 1.3205860
AATC    153 0.0052617   0.39062 1.3469977
AATG    160 0.0055024   0.0039062   1.4086251
AATT    140 0.0048146   0.0039062   1.2325469
ACAA    183 0.0062934   0.0039062   1.6111149
ACAC    72  0.0024761   0.0039062   0.6338813
ACAG    92  0.0031639   0.0039062   0.8099594
ACAT    122 0.0041956   0.0039062   1.0740766
ACCA    71  0.0024417   0.0039062   0.6250774
ACCC    46  0.0015820   0.0039062   0.4049797
ACCG    122 0.0041956   0.0039062   1.0740766
ACCT    42  0.0014444   0.0039062   0.3697641
ACGA    138 0.0047459   0.0039062   1.2149391
ACGC    89  0.0030607   0.0039062   0.7835477
ACGG    102 0.0035078   0.0039062   0.8979985
ACGT    82  0.0028200   0.0039062   0.7219204
ACTA    40  0.0013756   0.0039062   0.3521563
ACTC    46  0.0015820   0.0039062   0.4049797
ACTG    64  0.0022010   0.0039062   0.5634500
ACTT    87  0.0029920   0.0039062   0.7659399
AGAA    140 0.0048146   0.0039062   1.2325469
AGAC    56  0.0019259   0.0039062   0.4930188
AGAG    61  0.0020978   0.0039062   0.5370383
AGAT    77  0.0026481   0.0039062   0.6779008
AGCA    145 0.0049866   0.0039062   1.2765665
AGCC    103 0.0035422   0.0039062   0.9068024
AGCG    170 0.0058463   0.0039062   1.4966641
AGCT    86  0.0029576   0.0039062   0.7571360
AGGA    118 0.0040581   0.0039062   1.0388610
AGGC    91  0.0031295   0.0039062   0.8011555
AGGG    84  0.0028888   0.0039062   0.7395282
AGGT    42  0.0014444   0.0039062   0.3697641
AGTA    47  0.0016163   0.0039062   0.4137836
AGTC    46  0.0015820   0.0039062   0.4049797
AGTG    62  0.0021322   0.0039062   0.5458422
AGTT    82  0.0028200   0.0039062   0.7219204
ATAA    120 0.0041268   0.0039062   1.0564688
ATAC    86  0.0029576   0.0039062   0.7571360
ATAG    76  0.0026137   0.0039062   0.6690969
ATAT    170 0.0058463   0.0039062   1.4966641
ATCA    141 0.0048490   0.0039062   1.2413508
ATCC    117 0.0040237   0.0039062   1.0300571
ATCG    204 0.0070156   0.0039062   1.7959970
ATCT    77  0.0026481   0.0039062   0.6779008
ATGA    197 0.0067749   0.0039062   1.7343696
ATGC    122 0.0041956   0.0039062   1.0740766
ATGG    147 0.0050554   0.0039062   1.2941743
ATGT    122 0.0041956   0.0039062   1.0740766
ATTA    85  0.0029232   0.0039062   0.7483321
ATTC    153 0.0052617   0.0039062   1.3469977
ATTG    138 0.0047459   0.0039062   1.2149391
ATTT    227 0.0078066   0.0039062   1.9984868
CAAA    234 0.0080473   0.0039062   2.0601142
CAAC    136 0.0046771   0.0039062   1.1973313
CAAG    155 0.0053305   0.0039062   1.3646055
CAAT    138 0.0047459   0.0039062   1.2149391
CACA    81  0.0027856   0.0039062   0.7131164
CACC    88  0.0030263   0.0039062   0.7747438
CACG    72  0.0024761   0.0039062   0.6338813
CACT    62  0.0021322   0.0039062   0.5458422
CAGA    52  0.0017883   0.0039062   0.4578032
CAGC    152 0.0052273   0.0039062   1.3381938
CAGG    55  0.0018915   0.0039062   0.4842149
CAGT    64  0.0022010   0.0039062   0.5634500
CATA    108 0.0037141   0.0039062   0.9508219
CATC    194 0.0066717   0.0039062   1.7079579
CATG    126 0.0043332   0.0039062   1.1092922
CATT    160 0.0055024   0.0039062   1.4086251
CCAA    144 0.0049522   0.0039062   1.2677626
CCAC    71  0.0024417   0.0039062   0.6250774
CCAG    63  0.0021666   0.0039062   0.5546461
CCAT    147 0.0050554   0.0039062   1.2941743
CCCA    77  0.0026481   0.0039062   0.6779008
CCCC    94  0.0032327   0.0039062   0.8275672
CCCG    81  0.0027856   0.0039062   0.7131164
CCCT    84  0.0028888   0.0039062   0.7395282
CCGA    110 0.0037829   0.0039062   0.9684297
CCGC    167 0.0057432   0.0039062   1.4702524
CCGG    110 0.0037829   0.0039062   0.9684297
CCGT    102 0.0035078   0.0039062   0.8979985
CCTA    49  0.0016851   0.0039062   0.4313914
CCTC    90  0.0030951   0.0039062   0.7923516
CCTG    55  0.0018915   0.0039062   0.4842149
CCTT    142 0.0048834   0.0039062   1.2501548
CGAA    162 0.0055712   0.0039062   1.4262329
CGAC    101 0.0034734   0.0039062   0.8891946
CGAG    96  0.0033015   0.0039062   0.8451750
CGAT    204 0.0070156   0.0039062   1.7959970
CGCA    94  0.0032327   0.0039062   0.8275672
CGCC    183 0.0062934   0.0039062   1.6111149
CGCG    120 0.0041268   0.0039062   1.0564688
CGCT    170 0.0058463   0.0039062   1.4966641
CGGA    116 0.0039893   0.0039062   1.0212532
CGGC    171 0.0058807   0.0039062   1.5054681
CGGG    81  0.0027856   0.0039062   0.7131164
CGGT    122 0.0041956   0.0039062   1.0740766
CGTA    61  0.0020978   0.0039062   0.5370383
CGTC    110 0.0037829   0.0039062   0.9684297
CGTG    72  0.0024761   0.0039062   0.6338813
CGTT    168 0.0057776   0.0039062   1.4790563
CTAA    47  0.0016163   0.0039062   0.4137836
CTAC    46  0.0015820   0.0039062   0.4049797
CTAG    20  0.0006878   0.0039062   0.1760781
CTAT    76  0.0026137   0.0039062   0.6690969
CTCA    70  0.0024073   0.0039062   0.6162735
CTCC    109 0.0037485   0.0039062   0.9596258
CTCG    96  0.0033015   0.0039062   0.8451750
CTCT    61  0.0020978   0.0039062   0.5370383
CTGA    71  0.0024417   0.0039062   0.6250774
CTGC    97  0.0033359   0.0039062   0.8539790
CTGG    63  0.0021666   0.0039062   0.5546461
CTGT    92  0.0031639   0.0039062   0.8099594
CTTA    69  0.0023729   0.0039062   0.6074696
CTTC    169 0.0058120   0.0039062   1.4878602
CTTG    155 0.0053305   0.0039062   1.3646055
CTTT    170 0.0058463   0.0039062   1.4966641
GAAA    247 0.0084944   0.0039062   2.1745650
GAAC    126 0.0043332   0.0039062   1.1092922
GAAG    169 0.0058120   0.0039062   1.4878602
GAAT    153 0.0052617   0.0039062   1.3469977
GACA    110 0.0037829   0.0039062   0.9684297
GACC    60  0.0020634   0.0039062   0.5282344
GACG    110 0.0037829   0.0039062   0.9684297
GACT    46  0.0015820   0.0039062   0.4049797
GAGA    93  0.0031983   0.0039062   0.8187633
GAGC    107 0.0036798   0.0039062   0.9420180
GAGG    90  0.0030951   0.0039062   0.7923516
GAGT    46  0.0015820   0.0039062   0.4049797
GATA    80  0.0027512   0.0039062   0.7043125
GATC    112 0.0038517   0.0039062   0.9860376
GATG    194 0.0066717   0.0039062   1.7079579
GATT    153 0.0052617   0.0039062   1.3469977
GCAA    172 0.0059151   0.0039062   1.5142720
GCAC    73  0.0025105   0.0039062   0.6426852
GCAG    97  0.0033359   0.0039062   0.8539790
GCAT    122 0.0041956   0.0039062   1.0740766
GCCA    146 0.0050210   0.0039062   1.2853704
GCCC    81  0.0027856   0.0039062   0.7131164
GCCG    171 0.0058807   0.0039062   1.5054681
GCCT    91  0.0031295   0.0039062   0.8011555
GCGA    151 0.0051929   0.0039062   1.3293899
GCGC    160 0.0055024   0.0039062   1.4086251
GCGG    167 0.0057432   0.0039062   1.4702524
GCGT    89  0.0030607   0.0039062   0.7835477
GCTA    57  0.0019602   0.0039062   0.5018227
GCTC    107 0.0036798   0.0039062   0.9420180
GCTG    152 0.0052273   0.0039062   1.3381938
GCTT    188 0.0064654   0.0039062   1.6551345
GGAA    188 0.0064654   0.0039062   1.6551345
GGAC    66  0.0022698   0.0039062   0.5810578
GGAG    109 0.0037485   0.0039062   0.9596258
GGAT    117 0.0040237   0.0039062   1.0300571
GGCA    133 0.0045739   0.0039062   1.1709196
GGCC    70  0.0024073   0.0039062   0.6162735
GGCG    183 0.0062934   0.0039062   1.6111149
GGCT    103 0.0035422   0.0039062   0.9068024
GGGA    115 0.0039549   0.0039062   1.0124493
GGGC    81  0.0027856   0.0039062   0.7131164
GGGG    94  0.0032327   0.0039062   0.8275672
GGGT    46  0.0015820   0.0039062   0.4049797
GGTA    46  0.0015820   0.0039062   0.4049797
GGTC    60  0.0020634   0.0039062   0.5282344
GGTG    88  0.0030263   0.0039062   0.7747438
GGTT    87  0.0029920   0.0039062   0.7659399
GTAA    70  0.0024073   0.0039062   0.6162735
GTAC    52  0.0017883   0.0039062   0.4578032
GTAG    46  0.0015820   0.0039062   0.4049797
GTAT    86  0.0029576   0.0039062   0.7571360
GTCA    103 0.0035422   0.0039062   0.9068024
GTCC    66  0.0022698   0.0039062   0.5810578
GTCG    101 0.0034734   0.0039062   0.8891946
GTCT    56  0.0019259   0.0039062   0.4930188
GTGA    87  0.0029920   0.0039062   0.7659399
GTGC    73  0.0025105   0.0039062   0.6426852
GTGG    71  0.0024417   0.0039062   0.6250774
GTGT    72  0.0024761   0.0039062   0.6338813
GTTA    51  0.0017539   0.0039062   0.4489992
GTTC    126 0.0043332   0.0039062   1.1092922
GTTG    136 0.0046771   0.0039062   1.1973313
GTTT    202 0.0069468   0.0039062   1.7783892
TAAA    118 0.0040581   0.0039062   1.0388610
TAAC    51  0.0017539   0.0039062   0.4489992
TAAG    69  0.0023729   0.0039062   0.6074696
TAAT    85  0.0029232   0.0039062   0.7483321
TACA    100 0.0034390   0.0039062   0.8803907
TACC    46  0.0015820   0.0039062   0.4049797
TACG    61  0.0020978   0.0039062   0.5370383
TACT    47  0.0016163   0.0039062   0.4137836
TAGA    43  0.0014788   0.0039062   0.3785680
TAGC    57  0.0019602   0.0039062   0.5018227
TAGG    49  0.0016851   0.0039062   0.4313914
TAGT    40  0.0013756   0.0039062   0.3521563
TATA    114 0.0039205   0.0039062   1.0036454
TATC    80  0.0027512   0.0039062   0.7043125
TATG    108 0.0037141   0.0039062   0.9508219
TATT    150 0.0051585   0.0039062   1.3205860
TCAA    164 0.0056400   0.0039062   1.4438407
TCAC    87  0.0029920   0.0039062   0.7659399
TCAG    71  0.0024417   0.0039062   0.6250774
TCAT    197 0.0067749   0.0039062   1.7343696
TCCA    131 0.0045051   0.0039062   1.1533118
TCCC    115 0.0039549   0.0039062   1.0124493
TCCG    116 0.0039893   0.0039062   1.0212532
TCCT    118 0.0040581   0.0039062   1.0388610
TCGA    164 0.0056400   0.0039062   1.4438407
TCGC    151 0.0051929   0.0039062   1.3293899
TCGG    110 0.0037829   0.0039062   0.9684297
TCGT    138 0.0047459   0.0039062   1.2149391
TCTA    43  0.0014788   0.0039062   0.3785680
TCTC    93  0.0031983   0.0039062   0.8187633
TCTG    52  0.0017883   0.0039062   0.4578032
TCTT    146 0.0050210   0.0039062   1.2853704
TGAA    205 0.0070500   0.0039062   1.8048009
TGAC    103 0.0035422   0.0039062   0.9068024
TGAG    70  0.0024073   0.0039062   0.6162735
TGAT    141 0.0048490   0.0039062   1.2413508
TGCA    92  0.0031639   0.0039062   0.8099594
TGCC    133 0.0045739   0.0039062   1.1709196
TGCG    94  0.0032327   0.0039062   0.8275672
TGCT    145 0.0049866   0.0039062   1.2765665
TGGA    131 0.0045051   0.0039062   1.1533118
TGGC    146 0.0050210   0.0039062   1.2853704
TGGG    77  0.0026481   0.0039062   0.6779008
TGGT    71  0.0024417   0.0039062   0.6250774
TGTA    100 0.0034390   0.0039062   0.8803907
TGTC    110 0.0037829   0.0039062   0.9684297
TGTG    81  0.0027856   0.0039062   0.7131164
TGTT    178 0.0061215   0.0039062   1.5670954
TTAA    86  0.0029576   0.0039062   0.7571360
TTAC    70  0.0024073   0.0039062   0.6162735
TTAG    47  0.0016163   0.0039062   0.4137836
TTAT    120 0.0041268   0.0039062   1.0564688
TTCA    205 0.0070500   0.0039062   1.8048009
TTCC    188 0.0064654   0.0039062   1.6551345
TTCG    162 0.0055712   0.0039062   1.4262329
TTCT    140 0.0048146   0.0039062   1.2325469
TTGA    164 0.0056400   0.0039062   1.4438407
TTGC    172 0.0059151   0.0039062   1.5142720
TTGG    144 0.0049522   0.0039062   1.2677626
TTGT    183 0.0062934   0.0039062   1.6111149
TTTA    118 0.0040581   0.0039062   1.0388610
TTTC    247 0.0084944   0.0039062   2.1745650
TTTG    234 0.0080473   0.0039062   2.0601142
TTTT    364 0.0125181   0.0039062   3.2046221

Other   0   0.0000000   0.0000000   10000000000.0000000

我想捕获第一个块的第一列和第三列(Word和Obs频率)(从仅包含#的行开始并在包含"其他"的行中结束)并转置它们。从下面的块中,我只想在第一个转置下转置Obs Frequency。输出文件应如下所示:

    AAAA    AAAC    AAAG    AAAT    AACA    AACC    AACG    AACT    AAGA    AAGC    AAGG    AAGT    AATA    AATC    AATG    AATT    ACAA    ACAC    ACAG    ACAT    ACCA    ACCC    ACCG    ACCT    ACGA    ACGC    ACGG    ACGT    ACTA    ACTC    ACTG    ACTT    AGAA    AGAC    AGAG    AGAT    AGCA    AGCC    AGCG    AGCT    AGGA    AGGC    AGGG    AGGT    AGTA    AGTC    AGTG    AGTT    ATAA    ATAC    ATAG    ATAT    ATCA    ATCC    ATCG    ATCT    ATGA    ATGC    ATGG    ATGT    ATTA    ATTC    ATTG    ATTT    CAAA    CAAC    CAAG    CAAT    CACA    CACC    CACG    CACT    CAGA    CAGC    CAGG    CAGT    CATA    CATC    CATG    CATT    CCAA    CCAC    CCAG    CCAT    CCCA    CCCC    CCCG    CCCT    CCGA    CCGC    CCGG    CCGT    CCTA    CCTC    CCTG    CCTT    CGAA    CGAC    CGAG    CGAT    CGCA    CGCC    CGCG    CGCT    CGGA    CGGC    CGGG    CGGT    CGTA    CGTC    CGTG    CGTT    CTAA    CTAC    CTAG    CTAT    CTCA    CTCC    CTCG    CTCT    CTGA    CTGC    CTGG    CTGT    CTTA    CTTC    CTTG    CTTT    GAAA    GAAC    GAAG    GAAT    GACA    GACC    GACG    GACT    GAGA    GAGC    GAGG    GAGT    GATA    GATC    GATG    GATT    GCAA    GCAC    GCAG    GCAT    GCCA    GCCC    GCCG    GCCT    GCGA    GCGC    GCGG    GCGT    GCTA    GCTC    GCTG    GCTT    GGAA    GGAC    GGAG    GGAT    GGCA    GGCC    GGCG    GGCT    GGGA    GGGC    GGGG    GGGT    GGTA    GGTC    GGTG    GGTT    GTAA    GTAC    GTAG    GTAT    GTCA    GTCC    GTCG    GTCT    GTGA    GTGC    GTGG    GTGT    GTTA    GTTC    GTTG    GTTT    TAAA    TAAC    TAAG    TAAT    TACA    TACC    TACG    TACT    TAGA    TAGC    TAGG    TAGT    TATA    TATC    TATG    TATT    TCAA    TCAC    TCAG    TCAT    TCCA    TCCC    TCCG    TCCT    TCGA    TCGC    TCGG    TCGT    TCTA    TCTC    TCTG    TCTT    TGAA    TGAC    TGAG    TGAT    TGCA    TGCC    TGCG    TGCT    TGGA    TGGC    TGGG    TGGT    TGTA    TGTC    TGTG    TGTT    TTAA    TTAC    TTAG    TTAT    TTCA    TTCC    TTCG    TTCT    TTGA    TTGC    TTGG    TTGT    TTTA    TTTC    TTTG    TTTT
s21_contig00001 0.0125181   0.0069468   0.0058463   0.0078066   0.0061215   0.0029920   0.0057776   0.0028200   0.0050210   0.0064654   0.0048834   0.0029920   0.0051585   0.0052617   0.0055024   0.0048146   0.0062934   0.0024761   0.0031639   0.0041956   0.0024417   0.0015820   0.0041956   0.0014444   0.0047459   0.0030607   0.0035078   0.0028200   0.0013756   0.0015820   0.0022010   0.0029920   0.0048146   0.0019259   0.0020978   0.0026481   0.0049866   0.0035422   0.0058463   0.0029576   0.0040581   0.0031295   0.0028888   0.0014444   0.0016163   0.0015820   0.0021322   0.0028200   0.0041268   0.0029576   0.0026137   0.0058463   0.0048490   0.0040237   0.0070156   0.0026481   0.0067749   0.0041956   0.0050554   0.0041956   0.0029232   0.0052617   0.0047459   0.0078066   0.0080473   0.0046771   0.0053305   0.0047459   0.0027856   0.0030263   0.0024761   0.0021322   0.0017883   0.0052273   0.0018915   0.0022010   0.0037141   0.0066717   0.0043332   0.0055024   0.0049522   0.0024417   0.0021666   0.0050554   0.0026481   0.0032327   0.0027856   0.0028888   0.0037829   0.0057432   0.0037829   0.0035078   0.0016851   0.0030951   0.0018915   0.0048834   0.0055712   0.0034734   0.0033015   0.0070156   0.0032327   0.0062934   0.0041268   0.0058463   0.0039893   0.0058807   0.0027856   0.0041956   0.0020978   0.0037829   0.0024761   0.0057776   0.0016163   0.0015820   0.0006878   0.0026137   0.0024073   0.0037485   0.0033015   0.0020978   0.0024417   0.0033359   0.0021666   0.0031639   0.0023729   0.0058120   0.0053305   0.0058463   0.0084944   0.0043332   0.0058120   0.0052617   0.0037829   0.0020634   0.0037829   0.0015820   0.0031983   0.0036798   0.0030951   0.0015820   0.0027512   0.0038517   0.0066717   0.0052617   0.0059151   0.0025105   0.0033359   0.0041956   0.0050210   0.0027856   0.0058807   0.0031295   0.0051929   0.0055024   0.0057432   0.0030607   0.0019602   0.0036798   0.0052273   0.0064654   0.0064654   0.0022698   0.0037485   0.0040237   0.0045739   0.0024073   0.0062934   0.0035422   0.0039549   0.0027856   0.0032327   0.0015820   0.0015820   0.0020634   0.0030263   0.0029920   0.0024073   0.0017883   0.0015820   0.0029576   0.0035422   0.0022698   0.0034734   0.0019259   0.0029920   0.0025105   0.0024417   0.0024761   0.0017539   0.0043332   0.0046771   0.0069468   0.0040581   0.0017539   0.0023729   0.0029232   0.0034390   0.0015820   0.0020978   0.0016163   0.0014788   0.0019602   0.0016851   0.0013756   0.0039205   0.0027512   0.0037141   0.0051585   0.0056400   0.0029920   0.0024417   0.0067749   0.0045051   0.0039549   0.0039893   0.0040581   0.0056400   0.0051929   0.0037829   0.0047459   0.0014788   0.0031983   0.0017883   0.0050210   0.0070500   0.0035422   0.0024073   0.0048490   0.0031639   0.0045739   0.0032327   0.0049866   0.0045051   0.0050210   0.0026481   0.0024417   0.0034390   0.0037829   0.0027856   0.0061215   0.0029576   0.0024073   0.0016163   0.0041268   0.0070500   0.0064654   0.0055712   0.0048146   0.0056400   0.0059151   0.0049522   0.0062934   0.0040581   0.0084944   0.0080473   0.0125181
s21_contig00002 0.0125181   0.0069468   0.0058463   0.0078066   0.0061215   0.0029920   0.0057776   0.0028200   0.0050210   0.0064654   0.0048834   0.0029920   0.0051585   0.0052617   0.0055024   0.0048146   0.0062934   0.0024761   0.0031639   0.0041956   0.0024417   0.0015820   0.0041956   0.0014444   0.0047459   0.0030607   0.0035078   0.0028200   0.0013756   0.0015820   0.0022010   0.0029920   0.0048146   0.0019259   0.0020978   0.0026481   0.0049866   0.0035422   0.0058463   0.0029576   0.0040581   0.0031295   0.0028888   0.0014444   0.0016163   0.0015820   0.0021322   0.0028200   0.0041268   0.0029576   0.0026137   0.0058463   0.0048490   0.0040237   0.0070156   0.0026481   0.0067749   0.0041956   0.0050554   0.0041956   0.0029232   0.0052617   0.0047459   0.0078066   0.0080473   0.0046771   0.0053305   0.0047459   0.0027856   0.0030263   0.0024761   0.0021322   0.0017883   0.0052273   0.0018915   0.0022010   0.0037141   0.0066717   0.0043332   0.0055024   0.0049522   0.0024417   0.0021666   0.0050554   0.0026481   0.0032327   0.0027856   0.0028888   0.0037829   0.0057432   0.0037829   0.0035078   0.0016851   0.0030951   0.0018915   0.0048834   0.0055712   0.0034734   0.0033015   0.0070156   0.0032327   0.0062934   0.0041268   0.0058463   0.0039893   0.0058807   0.0027856   0.0041956   0.0020978   0.0037829   0.0024761   0.0057776   0.0016163   0.0015820   0.0006878   0.0026137   0.0024073   0.0037485   0.0033015   0.0020978   0.0024417   0.0033359   0.0021666   0.0031639   0.0023729   0.0058120   0.0053305   0.0058463   0.0084944   0.0043332   0.0058120   0.0052617   0.0037829   0.0020634   0.0037829   0.0015820   0.0031983   0.0036798   0.0030951   0.0015820   0.0027512   0.0038517   0.0066717   0.0052617   0.0059151   0.0025105   0.0033359   0.0041956   0.0050210   0.0027856   0.0058807   0.0031295   0.0051929   0.0055024   0.0057432   0.0030607   0.0019602   0.0036798   0.0052273   0.0064654   0.0064654   0.0022698   0.0037485   0.0040237   0.0045739   0.0024073   0.0062934   0.0035422   0.0039549   0.0027856   0.0032327   0.0015820   0.0015820   0.0020634   0.0030263   0.0029920   0.0024073   0.0017883   0.0015820   0.0029576   0.0035422   0.0022698   0.0034734   0.0019259   0.0029920   0.0025105   0.0024417   0.0024761   0.0017539   0.0043332   0.0046771   0.0069468   0.0040581   0.0017539   0.0023729   0.0029232   0.0034390   0.0015820   0.0020978   0.0016163   0.0014788   0.0019602   0.0016851   0.0013756   0.0039205   0.0027512   0.0037141   0.0051585   0.0056400   0.0029920   0.0024417   0.0067749   0.0045051   0.0039549   0.0039893   0.0040581   0.0056400   0.0051929   0.0037829   0.0047459   0.0014788   0.0031983   0.0017883   0.0050210   0.0070500   0.0035422   0.0024073   0.0048490   0.0031639   0.0045739   0.0032327   0.0049866   0.0045051   0.0050210   0.0026481   0.0024417   0.0034390   0.0037829   0.0027856   0.0061215   0.0029576   0.0024073   0.0016163   0.0041268   0.0070500   0.0064654   0.0055712   0.0048146   0.0056400   0.0059151   0.0049522   0.0062934   0.0040581   0.0084944   0.0080473   0.0125181

重要的是,每个块的标识符都带有模式" 21_contig"位于声明"输入序列为:"应该放在第一列,替换" Obs Frequency"。

3 个答案:

答案 0 :(得分:2)

这个awk脚本应该做你需要的:

script.awk的内容:

/^# +s21_contig/ { sequence[++seqcnt] = $2 }
                 { map[sequence[seqcnt], $1] = $3 }
/^[ACGT]+ / && !seen[$1]++ { words[++wordcnt] = $1 }
END {
    for (word=1; word<=wordcnt; word++) {
        printf "\t%s", words[word]
    }
    print ""
    for (seqnum=1; seqnum<=seqcnt; seqnum++) {
        printf "%s ", sequence[seqnum];
        for (word=1; word<=wordcnt; word++) {
            printf "%s%s", map[sequence[seqnum],words[word]], (word==wordcnt ? RS : FS)
        }
    }
}

像以下一样运行:

awk -f script.awk file

输出:

    AAAA    AAAC    AAAG    AAAT    AACA    AACC    AACG    AACT    AAGA    AAGC    AAGG    AAGT    AATA    AATC    AATG    AATT    ACAA    ACAC    ACAG    ACAT    ACCA    ACCC    ACCG    ACCT    ACGA    ACGC    ACGG    ACGT    ACTA    ACTC    ACTG    ACTT    AGAA    AGAC    AGAG    AGAT    AGCA    AGCC    AGCG    AGCT    AGGA    AGGC    AGGG    AGGT    AGTA    AGTC    AGTG    AGTT    ATAA    ATAC    ATAG    ATAT    ATCA    ATCC    ATCG    ATCT    ATGA    ATGC    ATGG    ATGT    ATTA    ATTC    ATTG    ATTT    CAAA    CAAC    CAAG    CAAT    CACA    CACC    CACG    CACT    CAGA    CAGC    CAGG    CAGT    CATA    CATC    CATG    CATT    CCAA    CCAC    CCAG    CCAT    CCCA    CCCC    CCCG    CCCT    CCGA    CCGC    CCGG    CCGT    CCTA    CCTC    CCTG    CCTT    CGAA    CGAC    CGAG    CGAT    CGCA    CGCC    CGCG    CGCT    CGGA    CGGC    CGGG    CGGT    CGTA    CGTC    CGTG    CGTT    CTAA    CTAC    CTAG    CTAT    CTCA    CTCC    CTCG    CTCT    CTGA    CTGC    CTGG    CTGT    CTTA    CTTC    CTTG    CTTT    GAAA    GAAC    GAAG    GAAT    GACA    GACC    GACG    GACT    GAGA    GAGC    GAGG    GAGT    GATA    GATC    GATG    GATT    GCAA    GCAC    GCAG    GCAT    GCCA    GCCC    GCCG    GCCT    GCGA    GCGC    GCGG    GCGT    GCTA    GCTC    GCTG    GCTT    GGAA    GGAC    GGAG    GGAT    GGCA    GGCC    GGCG    GGCT    GGGA    GGGC    GGGG    GGGT    GGTA    GGTC    GGTG    GGTT    GTAA    GTAC    GTAG    GTAT    GTCA    GTCC    GTCG    GTCT    GTGA    GTGC    GTGG    GTGT    GTTA    GTTC    GTTG    GTTT    TAAA    TAAC    TAAG    TAAT    TACA    TACC    TACG    TACT    TAGA    TAGC    TAGG    TAGT    TATA    TATC    TATG    TATT    TCAA    TCAC    TCAG    TCAT    TCCA    TCCC    TCCG    TCCT    TCGA    TCGC    TCGG    TCGT    TCTA    TCTC    TCTG    TCTT    TGAA    TGAC    TGAG    TGAT    TGCA    TGCC    TGCG    TGCT    TGGA    TGGC    TGGG    TGGT    TGTA    TGTC    TGTG    TGTT    TTAA    TTAC    TTAG    TTAT    TTCA    TTCC    TTCG    TTCT    TTGA    TTGC    TTGG    TTGT    TTTA    TTTC    TTTG    TTTT
s21_contig00001 0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002 0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181

答案 1 :(得分:2)

这似乎也有效(将此代码保存为transpose.awk):

/^# +s21_contig[0-9]+/ {
        if (source) print_results()
        source = $2
}
/^[ACGT]+ / {
        if (!($1 in key))
        {
                key[$1] = 1
                seq[++nkeys] = $1
        }
        obs[$1] = $3
}
END { print_results() }
function print_results(     i)
{
        if (printed_header == 0)
        {
                pad = "                 "
                for (i = 1; i <= nkeys; i++)
                {
                        printf "%s%s", pad, seq[i]
                        pad = "      "
                }
                printf "\n"
                printed_header++
        }
        printf "%s ", source
        for (i = 1; i <= nkeys; i++)
                printf " %-9s", obs[seq[i]]
        printf "\n"
        delete obs
}

将脚本运行为:

awk -f transpose.awk data

关于给定数据:

                 AAAA      AAAC      AAAG      AAAT      AACA      AACC      AACG      AACT      AAGA      AAGC      AAGG      AAGT      AATA      AATC      AATG      AATT      ACAA      ACAC      ACAG      ACAT      ACCA      ACCC      ACCG      ACCT      ACGA      ACGC      ACGG      ACGT      ACTA      ACTC      ACTG      ACTT      AGAA      AGAC      AGAG      AGAT      AGCA      AGCC      AGCG      AGCT      AGGA      AGGC      AGGG      AGGT      AGTA      AGTC      AGTG      AGTT      ATAA      ATAC      ATAG      ATAT      ATCA      ATCC      ATCG      ATCT      ATGA      ATGC      ATGG      ATGT      ATTA      ATTC      ATTG      ATTT      CAAA      CAAC      CAAG      CAAT      CACA      CACC      CACG      CACT      CAGA      CAGC      CAGG      CAGT      CATA      CATC      CATG      CATT      CCAA      CCAC      CCAG      CCAT      CCCA      CCCC      CCCG      CCCT      CCGA      CCGC      CCGG      CCGT      CCTA      CCTC      CCTG      CCTT      CGAA      CGAC      CGAG      CGAT      CGCA      CGCC      CGCG      CGCT      CGGA      CGGC      CGGG      CGGT      CGTA      CGTC      CGTG      CGTT      CTAA      CTAC      CTAG      CTAT      CTCA      CTCC      CTCG      CTCT      CTGA      CTGC      CTGG      CTGT      CTTA      CTTC      CTTG      CTTT      GAAA      GAAC      GAAG      GAAT      GACA      GACC      GACG      GACT      GAGA      GAGC      GAGG      GAGT      GATA      GATC      GATG      GATT      GCAA      GCAC      GCAG      GCAT      GCCA      GCCC      GCCG      GCCT      GCGA      GCGC      GCGG      GCGT      GCTA      GCTC      GCTG      GCTT      GGAA      GGAC      GGAG      GGAT      GGCA      GGCC      GGCG      GGCT      GGGA      GGGC      GGGG      GGGT      GGTA      GGTC      GGTG      GGTT      GTAA      GTAC      GTAG      GTAT      GTCA      GTCC      GTCG      GTCT      GTGA      GTGC      GTGG      GTGT      GTTA      GTTC      GTTG      GTTT      TAAA      TAAC      TAAG      TAAT      TACA      TACC      TACG      TACT      TAGA      TAGC      TAGG      TAGT      TATA      TATC      TATG      TATT      TCAA      TCAC      TCAG      TCAT      TCCA      TCCC      TCCG      TCCT      TCGA      TCGC      TCGG      TCGT      TCTA      TCTC      TCTG      TCTT      TGAA      TGAC      TGAG      TGAT      TGCA      TGCC      TGCG      TGCT      TGGA      TGGC      TGGG      TGGT      TGTA      TGTC      TGTG      TGTT      TTAA      TTAC      TTAG      TTAT      TTCA      TTCC      TTCG      TTCT      TTGA      TTGC      TTGG      TTGT      TTTA      TTTC      TTTG      TTTT
s21_contig00001  0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002  0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181

代码按照他们遇到的顺序打印[ACGT]代码序列,并根据需要添加新值。如果其中一个源缺少值,则它将在输出中显示为空白字段。标题行对应于第一组数据末尾的[ACGT]代码序列列表;代码永远不会尝试再次打印标题。

答案 2 :(得分:1)

一次性完成这项挑战是一项挑战。这个脚本实现了这一点。输出格式设置为与提供的样本匹配,但可以轻松调整。下面的内容注释解释了脚本操作。

#!/bin/bash

[ -f "$1" ] || {
    printf "\n  Error: insufficient input, file '%s' not found.\n\n" "${0//*\//}"
    exit 1
}

## this script requires the header row to be equal for each sequence

key="${2:-s21}"         # key to identify sequence ( 3 chars ) default "s21"
currentseq=""           # variable to hold sequence

declare -i needhdr=0    # flag to control print header
declare -i seqcnt=0     # sequence count
declare -a obsfarray    # array to hold Obs Frequency

## make single pass through data file
while read -r word obscnt obsfreq expfreq oefreq || [ -n "$word" ]; do

    ## capture inputseq from obscnt
    if [ "z${obscnt:0:3}" = "z${key}" ]; then
        # if sequence count > 0 headers is already printed and ready to print data
        if [ $seqcnt -gt 0 ]; then
            needhdr=1                           # set need header to false
            printf "\n%s" "$inputseq"           # print newline followed by input sequence
            for i in ${obsfarray[@]}; do        # print the Obs Frequency values
                printf "  $i"
            done
            unset obsfarray                     # unset the array for next sequence
        fi
        inputseq="${obscnt}"                    # set the inputseq valued from obscnt
        ((seqcnt++))                            # increment the seqcnt
    fi

    ## print header, capture obsfreq values
    # test that first char is A C G T
    if [ "z${word:0:1}" = "zA" ] || [ "z${word:0:1}" = "zC" ] || 
    [ "z${word:0:1}" = "zG" ] || [ "z${word:0:1}" = "zT" ]; then
        if [ "z${word:1:1}" != "zo" ]; then                 # get rid of pesky 'Total'
            [ $needhdr -eq 0 ] && printf "    %s" "$word"   # print header
            obsfarray+=( "$obsfreq" )                       # fill Obs Frequency array
        fi
    fi

    currentseq="$inputseq"    # keep current seq to test for new value

done <"$1"

# print final sequence and Obs Frequency array
printf "\n%s" "$inputseq"
for i in ${obsfarray[@]}; do
    printf "  $i"
done
unset obsfarray

exit 0

输出(每行显示5个值):

$ ./dna.sh dat/dna.dat
    AAAA    AAAC    AAAG    AAAT    AACA <snip>
s21_contig00001  0.0115837  0.0060850  0.0061659  0.0069745  0.0045890 <snip>
s21_contig00002  0.0125181  0.0069468  0.0058463  0.0078066  0.0061215 <snip>