从多个文本文件的矩阵中的第n行中提取第m个条目,并将值写入新的文本文件

时间:2016-02-21 14:43:44

标签: awk sed grep data-extraction

我有很多文本文件,每个文件包含一个矩阵,条目由制表符&空格(下面是一个文件中31x31矩阵的示例)。文件中的行长度限制为80个字符,因此每个文件中的矩阵被拆分为多个部分,每个部分中有5列。行标签和列标签相同,但每个文件中的值都不同。

我使用awk,tac和sed从更大的文本文件中提取每个矩阵,我可以使用grep提取以&#34; n&#34;开头的行,但是我得到多个匹配。< / p>

如何从矩阵中提取单个值,例如column-m,row-n?

的值

示例矩阵(如果我想要来自column-label-4的值,row-label-19它是0.170506e + 03,但我不想要column-label-9,row-label-19等)。

            1             2             3             4             5 
  1  0.000000e+00
  2  0.721521e+02  0.000000e+00
  3  0.600476e+02  0.128509e+01  0.000000e+00
  4  0.456807e+01  0.680774e+02  0.684277e+01  0.000000e+00
  5 -0.182269e+01  0.931860e+01  0.559086e+02 -0.242923e+01  0.000000e+00
  6  0.584994e+01 -0.197137e+00 -0.143585e+01  0.560372e+02  0.550432e+02
  7  0.542294e+02  0.262080e+01  0.157417e+01  0.286038e+01  0.354815e+01
  8 -0.910697e+00 -0.171964e+01 -0.226769e+01  0.234255e+00  0.353624e+00
  9  0.127740e+02 -0.660645e+00  0.105756e+01  0.138880e+00  0.394360e+00
 10 -0.567264e+01  0.266271e+02 -0.699972e+00 -0.241535e+01 -0.489788e+00
 11  0.164453e+01 -0.268724e+01 -0.937484e+00  0.216974e+01  0.106543e+01
 12 -0.151676e+00  0.354374e+01 -0.209327e-01 -0.195212e+00  0.202971e-01
 13  0.273307e-01 -0.203135e+00 -0.110701e-01  0.861538e-01 -0.110135e-01
 14 -0.573171e-02  0.214964e-01 -0.184033e-01  0.364860e-01  0.606920e-02
 15  0.283181e-03 -0.681593e-01 -0.119171e-01 -0.686958e-02 -0.806671e-02
 16 -0.179097e-01  0.546512e-01 -0.100820e-01 -0.920151e-02 -0.545025e-02
 17 -0.680697e-02  0.330910e-01 -0.882713e-02 -0.702130e-02 -0.904883e-02
 18  0.191358e+01  0.897776e+01  0.174601e+03 -0.160680e+01  0.132934e+01
 19  0.501936e+01 -0.148977e+01 -0.106646e+01  0.170506e+03  0.788318e+01
 20  0.750686e+01 -0.187707e+01  0.372143e+01  0.779846e+01  0.171860e+03
 21 -0.176860e+01  0.970217e+01  0.842497e+01  0.298143e+01  0.224798e+01
 22  0.277835e+01 -0.277276e-01  0.969619e+00 -0.622222e-01  0.282413e+00
 23 -0.313601e-01  0.159815e+00  0.128057e+00 -0.108106e+00 -0.142940e+00
 24 -0.664854e+00 -0.103596e+00 -0.525825e+00 -0.150942e+00 -0.168649e+00
 25  0.238097e+00 -0.203976e+00  0.139855e+00 -0.223460e+00 -0.160727e+00
 26  0.939240e-01  0.520710e+01  0.143495e+00  0.474836e+00 -0.177345e-01
 27 -0.217367e-01 -0.193976e-01 -0.402727e-01 -0.116617e-01 -0.391614e-01
 28  0.215782e-01 -0.257971e-01 -0.524527e-02  0.847063e-02 -0.812533e-02
 29 -0.351691e-01 -0.201438e-01 -0.349724e-01 -0.349667e-01 -0.404346e-01
 30 -0.242743e-01  0.273895e-01 -0.272225e-01 -0.321191e-01 -0.270577e-01
 31 -0.426292e-01 -0.650232e-01 -0.353929e-01 -0.500703e-01 -0.408613e-01
            6             7             8             9            10 
  6  0.000000e+00
  7 -0.936388e+00  0.000000e+00
  8 -0.771894e+00  0.347601e+02  0.000000e+00
  9 -0.780112e-02  0.398653e+02 -0.240450e+01  0.000000e+00
 10 -0.392030e+00  0.810380e+00  0.197957e+00 -0.160651e+01  0.000000e+00
 11 -0.104303e+01 -0.473339e+00 -0.181607e+00 -0.779374e-01  0.226143e+02
 12 -0.418416e-02  0.624987e-02  0.108646e-01  0.296996e-01 -0.666498e+01
 13 -0.164417e-02  0.805055e-02 -0.327675e-02  0.224641e+00 -0.228145e+01
 14 -0.896018e-02  0.467393e-02 -0.116081e-01  0.402774e-01 -0.187870e+01
 15 -0.165742e-01  0.116181e-01  0.491196e-02  0.149563e-02  0.192684e+00
 16 -0.108811e-01  0.914469e-02 -0.545833e-02 -0.108184e-02 -0.210964e+00
 17 -0.781403e-02 -0.580951e-02  0.118073e-02  0.154451e-02 -0.486910e+00
 18  0.897380e+01  0.424211e+01 -0.172328e+00 -0.184136e+00 -0.138291e+01
 19  0.657396e+00  0.147826e+01 -0.523860e-01 -0.111425e+00 -0.128888e+01
 20  0.288321e+01  0.315703e+00 -0.304020e-01  0.395279e+00  0.305902e+00
 21  0.172066e+03  0.353312e+00  0.257543e+00 -0.827528e-01 -0.546139e+00
 22 -0.173817e+00 -0.561225e+01 -0.231104e+01  0.132923e+03 -0.212803e+01
 23  0.678972e-01 -0.325811e+01 -0.333356e+01  0.132717e+03 -0.117255e+01
 24 -0.364090e-01 -0.559051e+01 -0.226053e+01  0.135664e+03 -0.191640e+01
 25  0.288697e-01 -0.154002e-01  0.732688e-01  0.389421e-01 -0.649423e+01
 26  0.329439e-01  0.113114e+00 -0.489852e-01  0.437129e-02 -0.178242e+01
 27 -0.350781e-01  0.331257e-01  0.692445e-01  0.162228e+00 -0.233797e+00
 28 -0.105914e-01  0.449153e-01 -0.421949e-01 -0.847419e-02  0.406464e+00
 29 -0.346255e-01 -0.126815e-01  0.298766e-01 -0.190825e-02 -0.931165e-01
 30 -0.306026e-01 -0.103474e-01 -0.750491e-02 -0.276690e-01 -0.387865e+00
 31 -0.391694e-01 -0.160436e-01  0.129641e-01 -0.278777e-01  0.273425e+00
           11            12            13            14            15 
 11  0.000000e+00
 12  0.467564e+02  0.000000e+00
 13  0.881448e+00  0.579819e+02  0.000000e+00
 14  0.716385e+01  0.607427e+02 -0.991284e+00  0.000000e+00
 15  0.287998e+01 -0.194597e+01  0.565281e+02  0.934983e+01  0.000000e+00
 16  0.463881e+01 -0.181340e+01  0.936084e+01  0.570706e+02 -0.196049e+01
 17 -0.832936e+00  0.976863e+01 -0.230035e+01 -0.238944e+01  0.563671e+02
 18  0.347788e+00 -0.479532e-01 -0.301202e-01 -0.153968e-01 -0.317186e-01
 19 -0.613594e+00 -0.614702e-01 -0.591356e-02 -0.214354e-01 -0.272421e-01
 20 -0.798555e+00 -0.729967e-01 -0.474076e-01 -0.464470e-01 -0.396436e-01
 21  0.344045e+00  0.304264e-01 -0.320892e-01 -0.199989e-01 -0.343410e-01
 22 -0.139634e+00 -0.155831e-02  0.451784e-01  0.122067e-01  0.391645e-01
 23 -0.134584e-01  0.977362e-01  0.117129e+00 -0.112740e-01  0.573879e-01
 24  0.665500e-01  0.205558e+00  0.291200e+00  0.203071e+00  0.185938e+00
 25  0.153013e+03 -0.385235e+01  0.209834e+01  0.268836e+01 -0.176283e+01
 26  0.142234e+03 -0.500715e+01  0.583433e+01  0.533312e+01  0.248117e+00
 27  0.369337e+01  0.145248e+01  0.169852e+03  0.667599e+01  0.211325e+01
 28  0.506080e+01  0.185510e+01  0.654919e+01  0.167717e+03 -0.133186e+01
 29  0.423633e+00  0.746968e+01  0.289356e+01 -0.155984e+01  0.169140e+03
 30  0.836116e+00  0.788074e+01 -0.154829e+01  0.321592e+01  0.768037e+01
 31  0.492733e+00 -0.167308e+01  0.769284e+01  0.771146e+01  0.274931e+01
           16            17            18            19            20 
 16  0.000000e+00
 17  0.563437e+02  0.000000e+00
 18 -0.277881e-01 -0.310893e-01  0.000000e+00
 19 -0.468903e-01 -0.349754e-01  0.674547e-01  0.000000e+00
 20 -0.356008e-01 -0.399449e-01  0.811357e+01  0.392889e+00  0.000000e+00
 21 -0.286906e-01 -0.390021e-01  0.982516e+00  0.850918e+01  0.770379e+01
 22 -0.203361e-02  0.179088e-01 -0.123824e+00 -0.325991e+00 -0.126917e+00
 23 -0.183104e-01 -0.239046e-01 -0.232345e+00 -0.960356e-01 -0.229829e+00
 24  0.681006e-01  0.727167e-01 -0.256214e+00 -0.111619e+00 -0.344057e+00
 25 -0.136226e+01  0.151444e+01 -0.316609e+00  0.782397e+00 -0.204809e+00
 26 -0.225132e+00  0.345762e+00 -0.151636e+00  0.203399e+00 -0.125574e+00
 27 -0.137205e+01  0.759751e+01 -0.156822e+00  0.861828e-01 -0.164811e+00
 28  0.183143e+01  0.740244e+01 -0.735917e-01 -0.173018e-01 -0.960416e-01
 29  0.774362e+01  0.275865e+01 -0.141085e+00 -0.948004e-01 -0.145096e+00
 30  0.169995e+03  0.256469e+01 -0.110523e+00 -0.130983e+00 -0.125583e+00
 31  0.262752e+01  0.168413e+03 -0.137933e+00 -0.152079e+00 -0.141287e+00
           21            22            23            24            25 
 21  0.000000e+00
 22 -0.233295e+00  0.000000e+00
 23 -0.302229e+00 -0.117659e+02  0.000000e+00
 24 -0.312839e+00 -0.176690e+02 -0.189160e+02  0.000000e+00
 25 -0.824478e-01 -0.283764e+00 -0.471927e-01 -0.276491e+00  0.000000e+00
 26 -0.166512e+00 -0.203046e+00 -0.165616e+00  0.266305e+00 -0.146540e+02
 27 -0.109422e+00  0.721405e-02  0.545424e+00  0.526054e+00 -0.123076e+01
 28 -0.750264e-01 -0.135673e+00 -0.160647e+00  0.100742e+00 -0.118033e+01
 29 -0.135413e+00 -0.296749e-02  0.458369e-01  0.741089e-01  0.247293e+00
 30 -0.124059e+00 -0.112653e+00 -0.192132e+00 -0.549934e-01  0.318556e+00
 31 -0.143265e+00 -0.841207e-01 -0.136539e+00 -0.619821e-01 -0.141302e+01
           26            27            28            29            30 
 26  0.000000e+00
 27 -0.934817e+00  0.000000e+00
 28  0.172890e+00  0.118621e+01  0.000000e+00
 29  0.126856e+00  0.807384e+01  0.223449e+00  0.000000e+00
 30  0.345871e-01  0.259443e+00  0.796643e+01  0.609036e+00  0.000000e+00
 31 -0.636999e+00  0.563246e+00  0.501209e+00  0.766542e+01  0.777868e+01
           31 
 31  0.000000e+00

2 个答案:

答案 0 :(得分:3)

awk -v row=19 -v col=4 '
    # a header row contains only blanks and digits
    ! /[^[:blank:][:digit:]]/ {
        if ($1 <= col && col <= $NF) {
          for (i=1; i<=NF; i++) { if ($i == col) field=i+1 }
        }
        next
    }
    $1 == row && field {print $field; exit}
' file
0.170506e+03

答案 1 :(得分:1)

看起来这些标签用作字段分隔符和布局空间。如果是这种情况,那么以下将提供更通用的解决方案,因为矩阵元素可以是任何东西:

awk -F\\t -v row=19 -v col=19 '
    # a header row is assumed to have "" in column 1
    0+$1 == 0 {
        # Guard against trailing tabs:
        while (NF>1 && 0+$NF == 0) {NF--}
        if (col <= $NF) { field = 2 + col - $2}
        next
    }
    $1 == row && field {print $field; exit}
' 

}

这也直接计算col的值而不是搜索它,但这是以检查尾随制表符为代价的。

(您可以考虑修改格式,以便可以可靠地检测标题,例如在第一个字段中输入“0”。)