我试图将数据从xml文件读入python, 我的文件看起来像这样,
1. <Array type="Matrix" nelem="2037">
2. <Matrix nrows="92" ncols="5">
3. 1.0144E+05 296.34 24.34 2.36E-02 9.18E-09
4. 1.0132E+05 296.12 34.62 2.34E-02 9.18E-09
5. 1.0103E+05 295.85 59.50 2.32E-02 9.61E-09
6. 1.0063E+05 295.51 94.08 2.30E-02 1.05E-08
7. 1.0010E+05 295.07 139.65 2.29E-02 1.24E-08
8. 9.9444E+04 294.51 196.23 2.28E-02 1.49E-08
9. 9.8676E+04 293.95 263.01 2.27E-02 1.70E-08
10. 9.7779E+04 293.36 341.50 2.23E-02 1.94E-08
11. 9.6727E+04 292.95 434.30 2.22E-02 2.27E-08
12. 9.5517E+04 292.28 542.09 2.20E-02 2.59E-08
13. 9.4150E+04 291.50 665.24 2.12E-02 2.70E-08
14. 9.2626E+04 290.83 804.34 2.08E-02 2.64E-08
15. 9.0945E+04 290.02 960.00 2.04E-02 2.52E-08
16. 8.9111E+04 289.29 1132.66 2.00E-02 2.51E-08
17. 8.7133E+04 288.48 1322.50 1.96E-02 2.61E-08
18. 8.5016E+04 287.48 1529.73 1.92E-02 2.68E-08
19. 8.2769E+04 286.42 1754.71 1.84E-02 2.77E-08
20. 8.0399E+04 285.76 1997.90 1.74E-02 2.91E-08
21. 7.7920E+04 285.16 2259.60 1.62E-02 3.14E-08
22. 7.5342E+04 284.11 2539.80 1.53E-02 3.40E-08
23. 7.2678E+04 282.85 2838.57 1.37E-02 3.46E-08
24. 6.9937E+04 281.48 3155.99 1.15E-02 3.30E-08
25. 6.7135E+04 279.79 3491.77 9.44E-03 3.06E-08
26. 6.4287E+04 277.74 3845.49 7.14E-03 3.38E-08
27. 6.1403E+04 276.48 4217.67 2.80E-03 5.91E-08
28. 5.8497E+04 274.86 4608.78 1.21E-03 6.84E-08
29. 5.5584E+04 271.76 5017.35 3.52E-03 4.09E-08
30. 5.2686E+04 268.40 5440.57 4.30E-03 2.86E-08
31. 4.9832E+04 265.81 5875.99 2.99E-03 2.45E-08
32. 4.7049E+04 263.28 6320.95 1.35E-03 1.88E-08
33. 4.4363E+04 260.75 6771.68 5.27E-04 1.42E-08
34. 4.1799E+04 257.97 7223.64 6.16E-04 1.30E-08
35. 3.9366E+04 255.02 7673.76 8.03E-04 1.43E-08
36. 3.7061E+04 251.94 8121.31 1.17E-03 1.72E-08
37. 3.4879E+04 248.88 8566.01 1.27E-03 1.74E-08
38. 3.2814E+04 245.60 9007.64 8.27E-04 1.53E-08
39. 3.0860E+04 242.06 9445.71 4.83E-04 1.20E-08
40. 2.9012E+04 238.35 9879.78 3.83E-04 9.68E-09
41. 2.7265E+04 234.55 10309.53 3.30E-04 9.10E-09
42. 2.5614E+04 230.80 10734.89 2.89E-04 8.65E-09
43. 2.4054E+04 227.19 11155.95 2.34E-04 8.05E-09
44. 2.2581E+04 223.68 11572.86 1.75E-04 7.38E-09
45. 2.1191E+04 220.25 11985.73 1.25E-04 7.66E-09
46. 1.9879E+04 217.01 12394.75 8.71E-05 7.49E-09
47. 1.8641E+04 213.99 12800.23 5.93E-05 2.50E-08
48. 1.7473E+04 211.29 13202.67 4.46E-05 5.99E-08
49. 1.6373E+04 208.76 13602.46 2.82E-05 7.64E-08
50. 1.5336E+04 206.22 13999.69 1.41E-05 9.54E-08
51. 1.4360E+04 203.68 14394.33 1.09E-05 1.05E-07
52. 1.3440E+04 201.70 14786.87 8.80E-06 1.34E-07
53. 1.2575E+04 200.15 15178.25 6.54E-06 1.46E-07
54. 1.1760E+04 198.40 15569.14 5.39E-06 1.68E-07
55. 1.0989E+04 196.78 15960.93 4.67E-06 2.74E-07
56. 1.0258E+04 196.25 16356.83 3.72E-06 5.86E-07
57. 9.5617E+03 196.05 16760.42 3.20E-06 1.02E-06
58. 8.8959E+03 194.61 17173.03 3.17E-06 1.25E-06
59. 8.2573E+03 196.84 17599.75 3.24E-06 1.78E-06
60. 7.6429E+03 197.60 18046.00 3.20E-06 2.03E-06
61. 7.0506E+03 197.83 18512.76 3.12E-06 2.02E-06
62. 6.4798E+03 198.96 19002.95 3.11E-06 1.95E-06
63. 5.9315E+03 201.72 19521.31 3.18E-06 1.77E-06
64. 5.4070E+03 205.16 20072.52 3.25E-06 1.36E-06
65. 4.9073E+03 208.03 20658.79 3.26E-06 7.92E-07
66. 4.4331E+03 210.27 21280.86 3.27E-06 3.69E-07
67. 3.9848E+03 211.16 21938.29 3.28E-06 2.51E-07
68. 3.5628E+03 211.81 22631.11 3.30E-06 3.40E-07
69. 3.1674E+03 212.75 23362.02 3.35E-06 9.72E-07
70. 2.7986E+03 214.17 24135.29 3.41E-06 2.31E-06
71. 2.4565E+03 215.78 24955.51 3.49E-06 4.13E-06
72. 2.1410E+03 217.31 25826.71 3.54E-06 5.75E-06
73. 1.8518E+03 218.25 26751.68 3.56E-06 6.85E-06
74. 1.5884E+03 220.18 27735.83 3.57E-06 7.84E-06
75. 1.3504E+03 224.28 28791.83 3.66E-06 8.91E-06
76. 1.1369E+03 228.51 29932.10 3.93E-06 9.87E-06
77. 9.4706E+02 230.12 31158.07 4.26E-06 1.04E-05
78. 7.7988E+02 231.07 32468.76 4.54E-06 1.05E-05
79. 6.3417E+02 233.23 33873.97 4.69E-06 1.03E-05
80. 5.0860E+02 237.07 35392.41 4.76E-06 9.53E-06
81. 4.0176E+02 242.64 37047.76 4.94E-06 8.10E-06
82. 3.1209E+02 250.37 38869.69 5.11E-06 6.37E-06
83. 2.3800E+02 256.95 40881.74 5.43E-06 4.94E-06
84. 1.7781E+02 261.15 43091.95 5.65E-06 3.89E-06
85. 1.2985E+02 263.11 45503.43 5.74E-06 3.16E-06
86. 9.2442E+01 266.27 48135.75 5.84E-06 2.52E-06
87. 6.3957E+01 267.56 51013.24 6.04E-06 2.09E-06
88. 4.2850E+01 264.55 54131.75 6.26E-06 1.81E-06
89. 2.7683E+01 255.02 57453.20 6.40E-06 1.45E-06
90. 1.7161E+01 241.49 60927.40 6.45E-06 1.09E-06
91. 1.0148E+01 226.03 64521.68 6.45E-06 7.12E-07
92. 5.6840E+00 210.65 68225.23 6.13E-06 4.00E-07
93. 2.9904E+00 196.66 72053.01 5.13E-06 2.16E-07
94. 1.0000E+00 183.12 78140.44 4.08E-06 1.51E-07
95. </Matrix>
96. <Matrix nrows="92" ncols="5">
97. 1.0158E+05 294.49 0.17 1.89E-02 2.14E-08
98. 1.0146E+05 294.34 10.39 1.82E-02 2.14E-08
99. 1.0117E+05 294.07 35.13 1.79E-02 3.07E-08
100. 1.0077E+05 293.72 69.49 1.78E-02 3.68E-08
101. 1.0024E+05 293.27 114.79 1.77E-02 3.97E-08
102. 9.9583E+04 292.72 171.05 1.77E-02 4.20E-08
103. 9.8814E+04 292.07 237.44 1.77E-02 4.39E-08
104. 9.7915E+04 291.30 315.44 1.76E-02 4.60E-08
105. 9.6861E+04 290.41 407.55 1.75E-02 4.80E-08
106. 9.5649E+04 289.84 514.48 1.53E-02 4.87E-08
107. 9.4279E+04 290.36 636.95 1.05E-02 4.99E-08
108. 9.2752E+04 290.23 775.71 9.77E-03 5.06E-08
109. 9.1067E+04 289.76 931.25 1.00E-02 5.24E-08
110. 8.9230E+04 289.17 1103.90 1.09E-02 5.59E-08
111. 8.7248E+04 288.44 1293.81 1.22E-02 5.74E-08
112. 8.5127E+04 287.46 1501.17 1.39E-02 5.66E-08
113. 8.2875E+04 286.01 1726.14 1.48E-02 5.49E-08
114. 8.0501E+04 284.37 1968.74 1.29E-02 5.49E-08
115. 7.8017E+04 284.30 2229.59 9.75E-03 5.71E-08
116. 7.5434E+04 283.44 2509.25 7.47E-03 6.05E-08
117. 7.2764E+04 282.38 2807.64 5.96E-03 6.27E-08
118. 7.0018E+04 281.58 3125.10 4.92E-03 6.16E-08
119. 6.7211E+04 280.25 3461.48 5.69E-03 6.17E-08
120. 6.4357E+04 277.96 3815.91 7.07E-03 6.21E-08
121. 6.1468E+04 275.34 4187.77 6.06E-03 6.18E-08
122. 5.8557E+04 273.19 4577.20 3.75E-03 6.04E-08
123. 5.5638E+04 271.04 4984.32 2.22E-03 5.91E-08
124. 5.2735E+04 268.30 5407.26 1.71E-03 5.79E-08
125. 4.9876E+04 264.80 5842.15 1.38E-03 5.71E-08
126. 4.7088E+04 261.36 6285.01 8.12E-04 5.68E-08
127. 4.4398E+04 258.32 6732.39 7.79E-04 5.66E-08
128. 4.1829E+04 255.49 7180.45 1.47E-03 5.16E-08
129. 3.9393E+04 252.93 7626.94 1.10E-03 5.16E-08
130. 3.7084E+04 250.55 8071.79 1.21E-03 5.00E-08
131. 3.4899E+04 247.67 8514.55 1.59E-03 4.51E-08
132. 3.2831E+04 244.13 8954.15 1.25E-03 4.52E-08
133. 3.0875E+04 240.59 9389.94 8.87E-04 4.68E-08
134. 2.9025E+04 237.16 9821.94 6.81E-04 4.55E-08
135. 2.7276E+04 233.85 10250.30 5.10E-04 4.51E-08
136. 2.5623E+04 230.59 10675.13 3.75E-04 4.65E-08
137. 2.4061E+04 227.27 11096.35 2.70E-04 4.51E-08
138. 2.2587E+04 223.78 11513.69 1.89E-04 4.43E-08
139. 2.1195E+04 220.31 11926.96 1.26E-04 4.71E-08
140. 1.9882E+04 216.94 12336.19 8.19E-05 4.96E-08
141. 1.8643E+04 213.32 12741.18 5.94E-05 4.80E-08
142. 1.7475E+04 209.90 13141.86 3.96E-05 5.31E-08
143. 1.6375E+04 207.85 13539.61 2.72E-05 9.51E-08
144. 1.5337E+04 206.91 13936.76 1.06E-05 1.16E-07
145. 1.4360E+04 205.45 14333.88 4.64E-06 1.15E-07
146. 1.3441E+04 203.53 14729.99 3.71E-06 1.37E-07
147. 1.2575E+04 201.98 15125.00 3.41E-06 2.02E-07
148. 1.1760E+04 199.74 15519.06 3.48E-06 2.91E-07
149. 1.0989E+04 197.36 15912.80 3.51E-06 3.46E-07
150. 1.0258E+04 196.71 16309.78 3.19E-06 4.26E-07
151. 9.5618E+03 196.94 16714.78 2.94E-06 5.47E-07
152. 8.8959E+03 196.94 17130.80 2.88E-06 7.00E-07
153. 8.2573E+03 197.86 17561.18 2.83E-06 9.02E-07
154. 7.6429E+03 199.41 18010.64 2.83E-06 1.06E-06
155. 7.0506E+03 200.54 18482.73 2.93E-06 1.21E-06
156. 6.4798E+03 201.99 18980.02 3.09E-06 1.27E-06
157. 5.9315E+03 203.57 19504.69 3.08E-06 1.15E-06
158. 5.4070E+03 205.51 20058.87 2.73E-06 8.33E-07
159. 4.9073E+03 209.41 20647.60 2.68E-06 8.14E-07
160. 4.4331E+03 212.32 21274.78 3.13E-06 1.24E-06
161. 3.9848E+03 214.68 21940.91 3.29E-06 1.64E-06
162. 3.5628E+03 215.32 22645.25 3.31E-06 1.80E-06
163. 3.1674E+03 216.56 23388.76 3.40E-06 2.31E-06
164. 2.7986E+03 217.74 24175.39 3.49E-06 3.20E-06
165. 2.4565E+03 219.17 25008.87 3.56E-06 4.36E-06
166. 2.1410E+03 220.43 25893.16 3.60E-06 5.54E-06
167. 1.8518E+03 221.35 26831.34 3.67E-06 6.81E-06
168. 1.5884E+03 223.48 27829.87 3.71E-06 8.09E-06
169. 1.3504E+03 225.96 28897.71 3.72E-06 9.12E-06
170. 1.1369E+03 228.41 30041.95 3.79E-06 9.85E-06
171. 9.4706E+02 232.50 31274.04 3.84E-06 1.04E-05
172. 7.7988E+02 237.28 32609.15 4.17E-06 1.03E-05
173. 6.3417E+02 239.76 34052.89 4.50E-06 1.00E-05
174. 5.0860E+02 242.46 35609.80 4.67E-06 9.29E-06
175. 4.0176E+02 249.00 37305.72 4.82E-06 7.84E-06
176. 3.1209E+02 253.53 39162.82 4.91E-06 6.44E-06
177. 2.3800E+02 258.29 41192.73 5.06E-06 5.13E-06
178. 1.7781E+02 263.13 43417.12 5.30E-06 4.06E-06
179. 1.2985E+02 267.62 45858.44 5.55E-06 3.30E-06
180. 9.2442E+01 268.18 48522.65 5.81E-06 2.69E-06
181. 6.3957E+01 267.50 51410.10 6.07E-06 2.21E-06
182. 4.2850E+01 262.93 54518.82 6.28E-06 1.91E-06
183. 2.7683E+01 254.42 57826.10 6.40E-06 1.53E-06
184. 1.7161E+01 240.48 61289.07 6.44E-06 1.16E-06
185. 1.0148E+01 224.94 64867.20 6.44E-06 7.50E-07
186. 5.6840E+00 210.51 68560.35 6.07E-06 4.00E-07
187. 2.9904E+00 199.10 72409.85 5.20E-06 2.04E-07
188. 1.0000E+00 186.87 78596.51 4.28E-06 1.46E-07
每组数据前面都有一个标题(
每个集合包含92行和5列。类似地,有5000个profiles.I想要查找所有第2行的所有配置文件的第一行的平均值(即3,97。等),直到第92行为止,
我使用以下代码来执行此操作
import numpy as np
lat=range(0,9)
add=range(3,94)
priori_p=[]
priori_t=[]
priori_z=[]
priori_H2O_vmr=[]
priori_O3_vmr=[]
with open('matrix.xml', 'r')as input_file,open('priori.xml', 'w') as output_file:
for a in add:
for b in lat :
l=(94*b)+a
lines = linecache.getline('matrix.xml',l)
lines=lines.split()
priori_p.append(lines[0])
priori_t.append(lines[1])
priori_z.append(lines[2])
priori_H2O_vmr.append(lines[3])
priori_O3_vmr.append(lines[4])
print lines
mean_p=np.mean(priori_p)
mean_t=np.mean(priori_t)
mean_z=np.mean(priori_z)
mean_H2O_vmr=np.mean(priori_H2O_vmr)
mean_O3_vmr=np.mean(priori_O3_vmr)
我已经尝试了很多阅读文件的方法,参考前面提到的问题,没有一个帮助过我,我尝试过linecache,readlines等等。代码有我在发布这个问题之前尝试过的最后一个方法。 在我打印'lines'的所有情况下,输出都是空白的 我的逻辑有什么问题吗? 请帮忙, 以下是我输入文件的链接
答案 0 :(得分:1)
以下是解析数据的一种方法。它查找开始和结束标记,并依次使用生成器yield
每个numpy数组。因此,要使用数据,您需要使用循环或其他类似的构造。
<强>代码:强>
def from_my_data(file_stream):
state = None
data = []
for line in (l.strip().split() for l in file_stream):
if not line:
# skip empty lines
continue
elif state == 'Matrix':
if line[0] == '</Matrix>':
state = 'Array'
yield np.array(data)
data = []
else:
data.append(line)
elif state == 'Array':
if line[0] == '<Matrix':
state = 'Matrix'
elif line[0] == '</Array>':
state = None
elif state is None:
if line[0] == '<Array':
state = 'Array'
使用:
上面的函数需要一个文件流。 (注意:我没有测试这部分。)
with open('matrix.xml', 'r') as input_file:
for array in from_my_data(input_file):
# process array
测试数据:
from io import StringIO
data_file = StringIO(u"""
<Array type="Matrix" nelem="2037">
<Matrix nrows="92" ncols="5">
1.0144E+05 296.34 24.34 2.36E-02 9.18E-09
1.0132E+05 296.12 34.62 2.34E-02 9.18E-09
5.6840E+00 210.65 68225.23 6.13E-06 4.00E-07
2.9904E+00 196.66 72053.01 5.13E-06 2.16E-07
1.0000E+00 183.12 78140.44 4.08E-06 1.51E-07
</Matrix>
<Matrix nrows="92" ncols="5">
1.0158E+05 294.49 0.17 1.89E-02 2.14E-08
1.0146E+05 294.34 10.39 1.82E-02 2.14E-08
1.0117E+05 294.07 35.13 1.79E-02 3.07E-08
1.0077E+05 293.72 69.49 1.78E-02 3.68E-08
5.5638E+04 271.04 4984.32 2.22E-03 5.91E-08
</Matrix>
<Matrix nrows="92" ncols="5">
1.0077E+05 293.72 69.49 1.78E-02 3.68E-08
6.4357E+04 277.96 3815.91 7.07E-03 6.21E-08
6.1468E+04 275.34 4187.77 6.06E-03 6.18E-08
5.8557E+04 273.19 4577.20 3.75E-03 6.04E-08
5.5638E+04 271.04 4984.32 2.22E-03 5.91E-08
</Matrix>
</Array>
""")
测试代码:
import numpy as np
for array in from_my_data(data_file):
print('---')
print(array)
<强>结果:强>
[[u'1.0144E+05' u'296.34' u'24.34' u'2.36E-02' u'9.18E-09']
[u'1.0132E+05' u'296.12' u'34.62' u'2.34E-02' u'9.18E-09']
[u'5.6840E+00' u'210.65' u'68225.23' u'6.13E-06' u'4.00E-07']
[u'2.9904E+00' u'196.66' u'72053.01' u'5.13E-06' u'2.16E-07']
[u'1.0000E+00' u'183.12' u'78140.44' u'4.08E-06' u'1.51E-07']]
---
[[u'1.0158E+05' u'294.49' u'0.17' u'1.89E-02' u'2.14E-08']
[u'1.0146E+05' u'294.34' u'10.39' u'1.82E-02' u'2.14E-08']
[u'1.0117E+05' u'294.07' u'35.13' u'1.79E-02' u'3.07E-08']
[u'1.0077E+05' u'293.72' u'69.49' u'1.78E-02' u'3.68E-08']
[u'5.5638E+04' u'271.04' u'4984.32' u'2.22E-03' u'5.91E-08']]
---
[[u'1.0077E+05' u'293.72' u'69.49' u'1.78E-02' u'3.68E-08']
[u'6.4357E+04' u'277.96' u'3815.91' u'7.07E-03' u'6.21E-08']
[u'6.1468E+04' u'275.34' u'4187.77' u'6.06E-03' u'6.18E-08']
[u'5.8557E+04' u'273.19' u'4577.20' u'3.75E-03' u'6.04E-08']
[u'5.5638E+04' u'271.04' u'4984.32' u'2.22E-03' u'5.91E-08']]