使用python中的行号读取文件时出错

时间:2017-03-24 03:21:48

标签: python xml numpy readlines linecache

我试图将数据从xml文件读入python, 我的文件看起来像这样,

1.  <Array type="Matrix" nelem="2037">                       
2.  <Matrix nrows="92" ncols="5">
3.   1.0144E+05 296.34    24.34  2.36E-02  9.18E-09
4.   1.0132E+05 296.12    34.62  2.34E-02  9.18E-09
5.   1.0103E+05 295.85    59.50  2.32E-02  9.61E-09
6.   1.0063E+05 295.51    94.08  2.30E-02  1.05E-08
7.   1.0010E+05 295.07   139.65  2.29E-02  1.24E-08
8.   9.9444E+04 294.51   196.23  2.28E-02  1.49E-08
9.   9.8676E+04 293.95   263.01  2.27E-02  1.70E-08
10.  9.7779E+04 293.36   341.50  2.23E-02  1.94E-08
11.  9.6727E+04 292.95   434.30  2.22E-02  2.27E-08
12.  9.5517E+04 292.28   542.09  2.20E-02  2.59E-08
13.  9.4150E+04 291.50   665.24  2.12E-02  2.70E-08
14.  9.2626E+04 290.83   804.34  2.08E-02  2.64E-08
15.  9.0945E+04 290.02   960.00  2.04E-02  2.52E-08
16.  8.9111E+04 289.29  1132.66  2.00E-02  2.51E-08
17.  8.7133E+04 288.48  1322.50  1.96E-02  2.61E-08
18.  8.5016E+04 287.48  1529.73  1.92E-02  2.68E-08
19.  8.2769E+04 286.42  1754.71  1.84E-02  2.77E-08
20.  8.0399E+04 285.76  1997.90  1.74E-02  2.91E-08
21.  7.7920E+04 285.16  2259.60  1.62E-02  3.14E-08
22.  7.5342E+04 284.11  2539.80  1.53E-02  3.40E-08
23.  7.2678E+04 282.85  2838.57  1.37E-02  3.46E-08
24.  6.9937E+04 281.48  3155.99  1.15E-02  3.30E-08
25.  6.7135E+04 279.79  3491.77  9.44E-03  3.06E-08
26.  6.4287E+04 277.74  3845.49  7.14E-03  3.38E-08
27.  6.1403E+04 276.48  4217.67  2.80E-03  5.91E-08
28.  5.8497E+04 274.86  4608.78  1.21E-03  6.84E-08
29.  5.5584E+04 271.76  5017.35  3.52E-03  4.09E-08
30.  5.2686E+04 268.40  5440.57  4.30E-03  2.86E-08
31.  4.9832E+04 265.81  5875.99  2.99E-03  2.45E-08
32.  4.7049E+04 263.28  6320.95  1.35E-03  1.88E-08
33.  4.4363E+04 260.75  6771.68  5.27E-04  1.42E-08
34.  4.1799E+04 257.97  7223.64  6.16E-04  1.30E-08
35.  3.9366E+04 255.02  7673.76  8.03E-04  1.43E-08
36.  3.7061E+04 251.94  8121.31  1.17E-03  1.72E-08
37.  3.4879E+04 248.88  8566.01  1.27E-03  1.74E-08
38.  3.2814E+04 245.60  9007.64  8.27E-04  1.53E-08
39.  3.0860E+04 242.06  9445.71  4.83E-04  1.20E-08
40.  2.9012E+04 238.35  9879.78  3.83E-04  9.68E-09
41.  2.7265E+04 234.55 10309.53  3.30E-04  9.10E-09
42.  2.5614E+04 230.80 10734.89  2.89E-04  8.65E-09
43.  2.4054E+04 227.19 11155.95  2.34E-04  8.05E-09
44.  2.2581E+04 223.68 11572.86  1.75E-04  7.38E-09
45.  2.1191E+04 220.25 11985.73  1.25E-04  7.66E-09
46.  1.9879E+04 217.01 12394.75  8.71E-05  7.49E-09
47.  1.8641E+04 213.99 12800.23  5.93E-05  2.50E-08
48.  1.7473E+04 211.29 13202.67  4.46E-05  5.99E-08
49.  1.6373E+04 208.76 13602.46  2.82E-05  7.64E-08
50.  1.5336E+04 206.22 13999.69  1.41E-05  9.54E-08
51.  1.4360E+04 203.68 14394.33  1.09E-05  1.05E-07
52.  1.3440E+04 201.70 14786.87  8.80E-06  1.34E-07
53.  1.2575E+04 200.15 15178.25  6.54E-06  1.46E-07
54.  1.1760E+04 198.40 15569.14  5.39E-06  1.68E-07
55.  1.0989E+04 196.78 15960.93  4.67E-06  2.74E-07
56.  1.0258E+04 196.25 16356.83  3.72E-06  5.86E-07
57.  9.5617E+03 196.05 16760.42  3.20E-06  1.02E-06
58.  8.8959E+03 194.61 17173.03  3.17E-06  1.25E-06
59.  8.2573E+03 196.84 17599.75  3.24E-06  1.78E-06
60.  7.6429E+03 197.60 18046.00  3.20E-06  2.03E-06
61.  7.0506E+03 197.83 18512.76  3.12E-06  2.02E-06
62.  6.4798E+03 198.96 19002.95  3.11E-06  1.95E-06
63.  5.9315E+03 201.72 19521.31  3.18E-06  1.77E-06
64.  5.4070E+03 205.16 20072.52  3.25E-06  1.36E-06
65.  4.9073E+03 208.03 20658.79  3.26E-06  7.92E-07
66.  4.4331E+03 210.27 21280.86  3.27E-06  3.69E-07
67.  3.9848E+03 211.16 21938.29  3.28E-06  2.51E-07
68.  3.5628E+03 211.81 22631.11  3.30E-06  3.40E-07
69.  3.1674E+03 212.75 23362.02  3.35E-06  9.72E-07
70.  2.7986E+03 214.17 24135.29  3.41E-06  2.31E-06
71.  2.4565E+03 215.78 24955.51  3.49E-06  4.13E-06
72.  2.1410E+03 217.31 25826.71  3.54E-06  5.75E-06
73.  1.8518E+03 218.25 26751.68  3.56E-06  6.85E-06
74.  1.5884E+03 220.18 27735.83  3.57E-06  7.84E-06
75.  1.3504E+03 224.28 28791.83  3.66E-06  8.91E-06
76.  1.1369E+03 228.51 29932.10  3.93E-06  9.87E-06
77.  9.4706E+02 230.12 31158.07  4.26E-06  1.04E-05
78.  7.7988E+02 231.07 32468.76  4.54E-06  1.05E-05
79.  6.3417E+02 233.23 33873.97  4.69E-06  1.03E-05
80.  5.0860E+02 237.07 35392.41  4.76E-06  9.53E-06
81.  4.0176E+02 242.64 37047.76  4.94E-06  8.10E-06
82.  3.1209E+02 250.37 38869.69  5.11E-06  6.37E-06
83.  2.3800E+02 256.95 40881.74  5.43E-06  4.94E-06
84.  1.7781E+02 261.15 43091.95  5.65E-06  3.89E-06
85.  1.2985E+02 263.11 45503.43  5.74E-06  3.16E-06
86.  9.2442E+01 266.27 48135.75  5.84E-06  2.52E-06
87.  6.3957E+01 267.56 51013.24  6.04E-06  2.09E-06
88.  4.2850E+01 264.55 54131.75  6.26E-06  1.81E-06
89.  2.7683E+01 255.02 57453.20  6.40E-06  1.45E-06
90.  1.7161E+01 241.49 60927.40  6.45E-06  1.09E-06
91.  1.0148E+01 226.03 64521.68  6.45E-06  7.12E-07
92.  5.6840E+00 210.65 68225.23  6.13E-06  4.00E-07
93.  2.9904E+00 196.66 72053.01  5.13E-06  2.16E-07
94.  1.0000E+00 183.12 78140.44  4.08E-06  1.51E-07
95. </Matrix>
96. <Matrix nrows="92" ncols="5">
97.  1.0158E+05 294.49     0.17  1.89E-02  2.14E-08
98.  1.0146E+05 294.34    10.39  1.82E-02  2.14E-08
99.  1.0117E+05 294.07    35.13  1.79E-02  3.07E-08
100.     1.0077E+05 293.72    69.49  1.78E-02  3.68E-08
101.     1.0024E+05 293.27   114.79  1.77E-02  3.97E-08
102.     9.9583E+04 292.72   171.05  1.77E-02  4.20E-08
103.     9.8814E+04 292.07   237.44  1.77E-02  4.39E-08
104.     9.7915E+04 291.30   315.44  1.76E-02  4.60E-08
105.     9.6861E+04 290.41   407.55  1.75E-02  4.80E-08
106.     9.5649E+04 289.84   514.48  1.53E-02  4.87E-08
107.     9.4279E+04 290.36   636.95  1.05E-02  4.99E-08
108.     9.2752E+04 290.23   775.71  9.77E-03  5.06E-08
109.     9.1067E+04 289.76   931.25  1.00E-02  5.24E-08
110.     8.9230E+04 289.17  1103.90  1.09E-02  5.59E-08
111.     8.7248E+04 288.44  1293.81  1.22E-02  5.74E-08
112.     8.5127E+04 287.46  1501.17  1.39E-02  5.66E-08
113.     8.2875E+04 286.01  1726.14  1.48E-02  5.49E-08
114.     8.0501E+04 284.37  1968.74  1.29E-02  5.49E-08
115.     7.8017E+04 284.30  2229.59  9.75E-03  5.71E-08
116.     7.5434E+04 283.44  2509.25  7.47E-03  6.05E-08
117.     7.2764E+04 282.38  2807.64  5.96E-03  6.27E-08
118.     7.0018E+04 281.58  3125.10  4.92E-03  6.16E-08
119.     6.7211E+04 280.25  3461.48  5.69E-03  6.17E-08
120.     6.4357E+04 277.96  3815.91  7.07E-03  6.21E-08
121.     6.1468E+04 275.34  4187.77  6.06E-03  6.18E-08
122.     5.8557E+04 273.19  4577.20  3.75E-03  6.04E-08
123.     5.5638E+04 271.04  4984.32  2.22E-03  5.91E-08
124.     5.2735E+04 268.30  5407.26  1.71E-03  5.79E-08
125.     4.9876E+04 264.80  5842.15  1.38E-03  5.71E-08
126.     4.7088E+04 261.36  6285.01  8.12E-04  5.68E-08
127.     4.4398E+04 258.32  6732.39  7.79E-04  5.66E-08
128.     4.1829E+04 255.49  7180.45  1.47E-03  5.16E-08
129.     3.9393E+04 252.93  7626.94  1.10E-03  5.16E-08
130.     3.7084E+04 250.55  8071.79  1.21E-03  5.00E-08
131.     3.4899E+04 247.67  8514.55  1.59E-03  4.51E-08
132.     3.2831E+04 244.13  8954.15  1.25E-03  4.52E-08
133.     3.0875E+04 240.59  9389.94  8.87E-04  4.68E-08
134.     2.9025E+04 237.16  9821.94  6.81E-04  4.55E-08
135.     2.7276E+04 233.85 10250.30  5.10E-04  4.51E-08
136.     2.5623E+04 230.59 10675.13  3.75E-04  4.65E-08
137.     2.4061E+04 227.27 11096.35  2.70E-04  4.51E-08
138.     2.2587E+04 223.78 11513.69  1.89E-04  4.43E-08
139.     2.1195E+04 220.31 11926.96  1.26E-04  4.71E-08
140.     1.9882E+04 216.94 12336.19  8.19E-05  4.96E-08
141.     1.8643E+04 213.32 12741.18  5.94E-05  4.80E-08
142.     1.7475E+04 209.90 13141.86  3.96E-05  5.31E-08
143.     1.6375E+04 207.85 13539.61  2.72E-05  9.51E-08
144.     1.5337E+04 206.91 13936.76  1.06E-05  1.16E-07
145.     1.4360E+04 205.45 14333.88  4.64E-06  1.15E-07
146.     1.3441E+04 203.53 14729.99  3.71E-06  1.37E-07
147.     1.2575E+04 201.98 15125.00  3.41E-06  2.02E-07
148.     1.1760E+04 199.74 15519.06  3.48E-06  2.91E-07
149.     1.0989E+04 197.36 15912.80  3.51E-06  3.46E-07
150.     1.0258E+04 196.71 16309.78  3.19E-06  4.26E-07
151.     9.5618E+03 196.94 16714.78  2.94E-06  5.47E-07
152.     8.8959E+03 196.94 17130.80  2.88E-06  7.00E-07
153.     8.2573E+03 197.86 17561.18  2.83E-06  9.02E-07
154.     7.6429E+03 199.41 18010.64  2.83E-06  1.06E-06
155.     7.0506E+03 200.54 18482.73  2.93E-06  1.21E-06
156.     6.4798E+03 201.99 18980.02  3.09E-06  1.27E-06
157.     5.9315E+03 203.57 19504.69  3.08E-06  1.15E-06
158.     5.4070E+03 205.51 20058.87  2.73E-06  8.33E-07
159.     4.9073E+03 209.41 20647.60  2.68E-06  8.14E-07
160.     4.4331E+03 212.32 21274.78  3.13E-06  1.24E-06
161.     3.9848E+03 214.68 21940.91  3.29E-06  1.64E-06
162.     3.5628E+03 215.32 22645.25  3.31E-06  1.80E-06
163.     3.1674E+03 216.56 23388.76  3.40E-06  2.31E-06
164.     2.7986E+03 217.74 24175.39  3.49E-06  3.20E-06
165.     2.4565E+03 219.17 25008.87  3.56E-06  4.36E-06
166.     2.1410E+03 220.43 25893.16  3.60E-06  5.54E-06
167.     1.8518E+03 221.35 26831.34  3.67E-06  6.81E-06
168.     1.5884E+03 223.48 27829.87  3.71E-06  8.09E-06
169.     1.3504E+03 225.96 28897.71  3.72E-06  9.12E-06
170.     1.1369E+03 228.41 30041.95  3.79E-06  9.85E-06
171.     9.4706E+02 232.50 31274.04  3.84E-06  1.04E-05
172.     7.7988E+02 237.28 32609.15  4.17E-06  1.03E-05
173.     6.3417E+02 239.76 34052.89  4.50E-06  1.00E-05
174.     5.0860E+02 242.46 35609.80  4.67E-06  9.29E-06
175.     4.0176E+02 249.00 37305.72  4.82E-06  7.84E-06
176.     3.1209E+02 253.53 39162.82  4.91E-06  6.44E-06
177.     2.3800E+02 258.29 41192.73  5.06E-06  5.13E-06
178.     1.7781E+02 263.13 43417.12  5.30E-06  4.06E-06
179.     1.2985E+02 267.62 45858.44  5.55E-06  3.30E-06
180.     9.2442E+01 268.18 48522.65  5.81E-06  2.69E-06
181.     6.3957E+01 267.50 51410.10  6.07E-06  2.21E-06
182.     4.2850E+01 262.93 54518.82  6.28E-06  1.91E-06
183.     2.7683E+01 254.42 57826.10  6.40E-06  1.53E-06
184.     1.7161E+01 240.48 61289.07  6.44E-06  1.16E-06
185.     1.0148E+01 224.94 64867.20  6.44E-06  7.50E-07
186.     5.6840E+00 210.51 68560.35  6.07E-06  4.00E-07
187.     2.9904E+00 199.10 72409.85  5.20E-06  2.04E-07
188.     1.0000E+00 186.87 78596.51  4.28E-06  1.46E-07

每组数据前面都有一个标题(
每个集合包含92行和5列。类似地,有5000个profiles.I想要查找所有第2行的所有配置文件的第一行的平均值(即3,97。等),直到第92行为止, 我使用以下代码来执行此操作

import numpy as np
lat=range(0,9)
add=range(3,94)
priori_p=[]
priori_t=[]
priori_z=[]
priori_H2O_vmr=[]
priori_O3_vmr=[]
with open('matrix.xml', 'r')as     input_file,open('priori.xml', 'w') as output_file:
    for a in add:
        for b in lat :   
            l=(94*b)+a
            lines = linecache.getline('matrix.xml',l)
            lines=lines.split()
            priori_p.append(lines[0])
            priori_t.append(lines[1])
            priori_z.append(lines[2])
            priori_H2O_vmr.append(lines[3])
            priori_O3_vmr.append(lines[4])
            print lines
        mean_p=np.mean(priori_p)
        mean_t=np.mean(priori_t)
        mean_z=np.mean(priori_z)
        mean_H2O_vmr=np.mean(priori_H2O_vmr)
        mean_O3_vmr=np.mean(priori_O3_vmr)

我已经尝试了很多阅读文件的方法,参考前面提到的问题,没有一个帮助过我,我尝试过linecache,readlines等等。代码有我在发布这个问题之前尝试过的最后一个方法。 在我打印'lines'的所有情况下,输出都是空白的 我的逻辑有什么问题吗? 请帮忙, 以下是我输入文件的链接

https://1drv.ms/u/s!AmPNuP3pNnN8hhdee9xJ7a81FA_p

1 个答案:

答案 0 :(得分:1)

以下是解析数据的一种方法。它查找开始和结束标记,并依次使用生成器yield每个numpy数组。因此,要使用数据,您需要使用循环或其他类似的构造。

<强>代码:

def from_my_data(file_stream):
    state = None
    data = []
    for line in (l.strip().split() for l in file_stream):
        if not line:
            # skip empty lines
            continue

        elif state == 'Matrix':
            if line[0] == '</Matrix>':
                state = 'Array'
                yield np.array(data)
                data = []
            else:
                data.append(line)

        elif state == 'Array':
            if line[0] == '<Matrix':
                state = 'Matrix'
            elif line[0] == '</Array>':
                state = None

        elif state is None:
            if line[0] == '<Array':
                state = 'Array'

使用:

上面的函数需要一个文件流。 (注意:我没有测试这部分。)

with open('matrix.xml', 'r') as input_file:
    for array in from_my_data(input_file):
        # process array

测试数据:

from io import StringIO
data_file = StringIO(u"""
    <Array type="Matrix" nelem="2037">
    <Matrix nrows="92" ncols="5">
     1.0144E+05 296.34    24.34  2.36E-02  9.18E-09
     1.0132E+05 296.12    34.62  2.34E-02  9.18E-09
     5.6840E+00 210.65 68225.23  6.13E-06  4.00E-07
     2.9904E+00 196.66 72053.01  5.13E-06  2.16E-07
     1.0000E+00 183.12 78140.44  4.08E-06  1.51E-07
    </Matrix>
    <Matrix nrows="92" ncols="5">
     1.0158E+05 294.49     0.17  1.89E-02  2.14E-08
     1.0146E+05 294.34    10.39  1.82E-02  2.14E-08
     1.0117E+05 294.07    35.13  1.79E-02  3.07E-08
     1.0077E+05 293.72    69.49  1.78E-02  3.68E-08
     5.5638E+04 271.04  4984.32  2.22E-03  5.91E-08
    </Matrix>
    <Matrix nrows="92" ncols="5">
     1.0077E+05 293.72    69.49  1.78E-02  3.68E-08
     6.4357E+04 277.96  3815.91  7.07E-03  6.21E-08
     6.1468E+04 275.34  4187.77  6.06E-03  6.18E-08
     5.8557E+04 273.19  4577.20  3.75E-03  6.04E-08
     5.5638E+04 271.04  4984.32  2.22E-03  5.91E-08
    </Matrix>
    </Array>
""")

测试代码:

import numpy as np

for array in from_my_data(data_file):
    print('---')
    print(array)

<强>结果:

[[u'1.0144E+05' u'296.34' u'24.34' u'2.36E-02' u'9.18E-09']
 [u'1.0132E+05' u'296.12' u'34.62' u'2.34E-02' u'9.18E-09']
 [u'5.6840E+00' u'210.65' u'68225.23' u'6.13E-06' u'4.00E-07']
 [u'2.9904E+00' u'196.66' u'72053.01' u'5.13E-06' u'2.16E-07']
 [u'1.0000E+00' u'183.12' u'78140.44' u'4.08E-06' u'1.51E-07']]
---
[[u'1.0158E+05' u'294.49' u'0.17' u'1.89E-02' u'2.14E-08']
 [u'1.0146E+05' u'294.34' u'10.39' u'1.82E-02' u'2.14E-08']
 [u'1.0117E+05' u'294.07' u'35.13' u'1.79E-02' u'3.07E-08']
 [u'1.0077E+05' u'293.72' u'69.49' u'1.78E-02' u'3.68E-08']
 [u'5.5638E+04' u'271.04' u'4984.32' u'2.22E-03' u'5.91E-08']]
---
[[u'1.0077E+05' u'293.72' u'69.49' u'1.78E-02' u'3.68E-08']
 [u'6.4357E+04' u'277.96' u'3815.91' u'7.07E-03' u'6.21E-08']
 [u'6.1468E+04' u'275.34' u'4187.77' u'6.06E-03' u'6.18E-08']
 [u'5.8557E+04' u'273.19' u'4577.20' u'3.75E-03' u'6.04E-08']
 [u'5.5638E+04' u'271.04' u'4984.32' u'2.22E-03' u'5.91E-08']]