我有一个文件,其中列表跨越多行 - 列表的长度为常量。但是,在每一行中,元素的数量可以变化。 如何在Python中读取此文件以读取整个列表?
编辑:更喜欢非正则表达式解决方案。
看起来像这样的文件(仅用于说明):
[ -6.70031086e-02 5.93684241e-02 1.11689426e-01 1.16174825e-01
-3.74981388e-02 4.05267589e-02 2.02941950e-02 1.65661901e-01
9.88883078e-02 -1.86108038e-01 -2.09761858e-01 2.08867267e-02
-7.34964982e-02 -1.38626635e-01 1.33853648e-02 -1.11527992e-02
7.19301552e-02 5.71861453e-02 -8.56672525e-02 8.01878721e-02
-2.27990234e-03 8.93531218e-02 -7.99949542e-02 -3.89122330e-02
3.07365637e-02 -1.14912149e-02 -1.25382066e-01 1.61550958e-02
-9.03828740e-02 -8.40659663e-02 2.35458408e-02 6.62269741e-02
-6.83306251e-03 3.86000201e-02 -2.85124127e-02 -1.22550033e-01
6.14493713e-02 5.42194061e-02 -9.98141840e-02 3.87526527e-02
-1.77935660e-02 6.59185136e-03 -7.56490007e-02 -8.04342143e-03
4.22548652e-02 -4.90937680e-02 7.31833130e-02 4.60098870e-02
-3.38455513e-02 7.72312284e-02 1.69506043e-01 8.54071528e-02
-5.15969582e-02 -8.66574422e-02 2.78513003e-02 -8.26551542e-02
5.72918989e-02 -8.63238499e-02 -1.09750973e-02 -1.04178898e-01
4.04170994e-03 7.16830865e-02 1.16529778e-01 1.65875465e-01
1.82720050e-02 1.71985731e-01 -2.09263922e-03 -3.31376195e-02
1.26107544e-01 1.47209521e-02 -1.41869476e-02 5.07163629e-02
1.49011686e-01 9.49593708e-02 4.67912182e-02 -8.64533633e-02
4.12282310e-02 8.19735080e-02 1.49312839e-02 2.14010417e-01
1.43005610e-01 -6.68876693e-02 1.25497788e-01 -8.12855735e-02
1.89039335e-02 -7.57512003e-02 4.25233506e-02 -6.90079033e-02
8.08808357e-02 -3.47024412e-03 2.63141114e-02 1.61882326e-01
1.25483396e-02 1.45484000e-01 3.12147997e-02 5.61049813e-03
-1.52215753e-02 -9.00566354e-02 7.78550655e-02 2.32269196e-03
6.35183901e-02 -1.34039536e-01 1.12368152e-01 -5.65479957e-02
-1.40751451e-01 -3.24242609e-03 -2.60595884e-02 -3.79961394e-02
9.53520015e-02 1.18161231e-01 -6.31203428e-02 6.54687434e-02
-8.70579779e-02 1.64551754e-02 -4.66874018e-02 -2.02252846e-02
1.81142420e-01 -4.29894254e-02 8.62734243e-02 -1.96067482e-01
-5.18136062e-02 -1.02697751e-02 -8.20104256e-02 -7.04407394e-02
-1.37479603e-01 1.51444465e-01 1.46553725e-01 6.87731877e-02]
[ 0.13552369 -0.05061625 0.13381879 -0.09299553 -0.10647763 -0.02260791
0.00843107 0.01909993 0.0252617 -0.09204189 0.11444099 0.16380875
-0.26470438 0.04185624 0.08701419 -0.00960395 0.03196884 0.05695887
0.03903539 0.0330128 0.0088141 0.03981387 -0.2256397 0.1373885
-0.00823926 -0.23756374 0.14071368 0.15679301 0.05020505 0.00083234
0.14197688 -0.17108534 -0.03471961 -0.09328505 0.04228394 0.07565336
-0.06243521 -0.09347741 -0.00821514 -0.06649745 0.05205032 -0.00554045
-0.00386953 0.05514322 -0.0234912 -0.11922046 0.14259741 -0.04250529
0.02933454 0.09837652 -0.04943179 -0.01795183 0.11347186 -0.0262726
0.14694421 0.00120262 0.02876565 0.06762701 -0.06783341 -0.0130248
0.0304249 0.04527348 0.15238339 0.01605285 0.02574495 0.03512112
-0.05733667 -0.09585288 0.05414675 0.14885603 -0.02176115 -0.11798949
0.10624658 0.04126133 0.0355645 -0.0176413 0.01316 -0.0731855
0.06095812 -0.03693416 0.05717857 -0.06640249 0.02760602 -0.11397229
-0.08891453 -0.05422837 -0.00309273 -0.08528782 0.04416328 0.10460843
0.08477673 -0.03460682 0.26425052 0.027636 -0.01395808 -0.04762371
-0.11365297 -0.09291256 0.02920797 0.1462263 -0.1354932 -0.00904074
0.16209167 -0.0351855 0.0287815 0.082674 0.03369482 -0.04522609
0.01189264 -0.03094579 -0.1829372 -0.0331573 0.03074961 -0.01479802
-0.06882931 -0.02879945 0.04064524 0.1048708 0.11631119 -0.13730904
-0.01107442 0.07329052 0.013919 0.02282012 0.14160685 -0.08278389
0.04416744 0.17811519 0.06306098 -0.15048456 -0.08337893 0.06718753
0.02712255 0.0626005 0.05940831 0.08399926 0.22958109 -0.06148282
-0.05348093 -0.05489948 0.18494032 -0.01777483 0.03008986 0.03045709
-0.09592026 0.17701676 -0.21119906 -0.01997624 0.15930974 -0.03315869 ]
答案 0 :(得分:2)
import re
p=re.compile(r'\[.*\]', re.S)
num=re.compile(r'\S+')
f=open("lst", "r")
s=f.read()
f.close()
l=p.findall(s)
lst=[]
for i in l:
tmp=[]
num_list=num.findall(i)
del num_list[0]
for n in num_list:
if n!=']':
tmp.append(n)
lst.append(tmp)
print lst
lst
是list
lists
从您的文件中读取的内容。
答案 1 :(得分:2)
我写了这段代码作为你要求的问题的解决方案。如果这对你有用,请告诉我。提前谢谢!
f=open('sample.txt','r')
y=[]
for a in f:
b=a.split()
for c in b:
if c[0]=='[':
d=c[1:]
elif c[-1]==']':
d=c[:-1]
else:
d=c
y.append(d)
f.close()
print y
答案 2 :(得分:2)
如果你想要花车,你不需要正则表达式,只需要剥离和拆分,映射到float:
def sections():
with open("in.txt") as f:
tmp = []
for line in f:
data = list(map(float, line.strip(" []\n").split()))
if line.rstrip().endswith("]"):
yield tmp
tmp = []
tmp.append(data)
from pprint import pprint as pp
pp(list(sections()))
输出:
[[[-0.0670031086, 0.0593684241, 0.111689426, 0.116174825],
[-0.0374981388, 0.0405267589, 0.020294195, 0.165661901],
[0.0988883078, -0.186108038, -0.209761858, 0.0208867267],
[-0.0734964982, -0.138626635, 0.0133853648, -0.0111527992],
[0.0719301552, 0.0571861453, -0.0856672525, 0.0801878721],
[-0.00227990234, 0.0893531218, -0.0799949542, -0.038912233],
[0.0307365637, -0.0114912149, -0.125382066, 0.0161550958],
[-0.090382874, -0.0840659663, 0.0235458408, 0.0662269741],
[-0.00683306251, 0.0386000201, -0.0285124127, -0.122550033],
[0.0614493713, 0.0542194061, -0.099814184, 0.0387526527],
[-0.017793566, 0.00659185136, -0.0756490007, -0.00804342143],
[0.0422548652, -0.049093768, 0.073183313, 0.046009887],
[-0.0338455513, 0.0772312284, 0.169506043, 0.0854071528],
[-0.0515969582, -0.0866574422, 0.0278513003, -0.0826551542],
[0.0572918989, -0.0863238499, -0.0109750973, -0.104178898],
[0.00404170994, 0.0716830865, 0.116529778, 0.165875465],
[0.018272005, 0.171985731, -0.00209263922, -0.0331376195],
[0.126107544, 0.0147209521, -0.0141869476, 0.0507163629],
[0.149011686, 0.0949593708, 0.0467912182, -0.0864533633],
[0.041228231, 0.081973508, 0.0149312839, 0.214010417],
[0.14300561, -0.0668876693, 0.125497788, -0.0812855735],
[0.0189039335, -0.0757512003, 0.0425233506, -0.0690079033],
[0.0808808357, -0.00347024412, 0.0263141114, 0.161882326],
[0.0125483396, 0.145484, 0.0312147997, 0.00561049813],
[-0.0152215753, -0.0900566354, 0.0778550655, 0.00232269196],
[0.0635183901, -0.134039536, 0.112368152, -0.0565479957],
[-0.140751451, -0.00324242609, -0.0260595884, -0.0379961394],
[0.0953520015, 0.118161231, -0.0631203428, 0.0654687434],
[-0.0870579779, 0.0164551754, -0.0466874018, -0.0202252846],
[0.18114242, -0.0429894254, 0.0862734243, -0.196067482],
[-0.0518136062, -0.0102697751, -0.0820104256, -0.0704407394]],
[[-0.137479603, 0.151444465, 0.146553725, 0.0687731877],
[0.13552369,
-0.05061625,
0.13381879,
-0.09299553,
-0.10647763,
-0.02260791],
[0.00843107, 0.01909993, 0.0252617, -0.09204189, 0.11444099, 0.16380875],
[-0.26470438, 0.04185624, 0.08701419, -0.00960395, 0.03196884, 0.05695887],
[0.03903539, 0.0330128, 0.0088141, 0.03981387, -0.2256397, 0.1373885],
[-0.00823926, -0.23756374, 0.14071368, 0.15679301, 0.05020505, 0.00083234],
[0.14197688, -0.17108534, -0.03471961, -0.09328505, 0.04228394, 0.07565336],
[-0.06243521,
-0.09347741,
-0.00821514,
-0.06649745,
0.05205032,
-0.00554045],
[-0.00386953, 0.05514322, -0.0234912, -0.11922046, 0.14259741, -0.04250529],
[0.02933454, 0.09837652, -0.04943179, -0.01795183, 0.11347186, -0.0262726],
[0.14694421, 0.00120262, 0.02876565, 0.06762701, -0.06783341, -0.0130248],
[0.0304249, 0.04527348, 0.15238339, 0.01605285, 0.02574495, 0.03512112],
[-0.05733667,
-0.09585288,
0.05414675,
0.14885603,
-0.02176115,
-0.11798949],
[0.10624658, 0.04126133, 0.0355645, -0.0176413, 0.01316, -0.0731855],
[0.06095812, -0.03693416, 0.05717857, -0.06640249, 0.02760602, -0.11397229],
[-0.08891453,
-0.05422837,
-0.00309273,
-0.08528782,
0.04416328,
0.10460843],
[0.08477673, -0.03460682, 0.26425052, 0.027636, -0.01395808, -0.04762371],
[-0.11365297, -0.09291256, 0.02920797, 0.1462263, -0.1354932, -0.00904074],
[0.16209167, -0.0351855, 0.0287815, 0.082674, 0.03369482, -0.04522609],
[0.01189264, -0.03094579, -0.1829372, -0.0331573, 0.03074961, -0.01479802],
[-0.06882931, -0.02879945, 0.04064524, 0.1048708, 0.11631119, -0.13730904],
[-0.01107442, 0.07329052, 0.013919, 0.02282012, 0.14160685, -0.08278389],
[0.04416744, 0.17811519, 0.06306098, -0.15048456, -0.08337893, 0.06718753],
[0.02712255, 0.0626005, 0.05940831, 0.08399926, 0.22958109, -0.06148282],
[-0.05348093, -0.05489948, 0.18494032, -0.01777483, 0.03008986, 0.03045709]]]
如果要存储阵列,可以考虑使用 numpy.save 或 pickle 等。以当前格式存储可能不是最佳选择。
答案 3 :(得分:2)
这是另一种解决方案:
file = open('database.txt', 'r')
text = file.read()
file.close()
## long version
lists = text.split(']')
lists = lists[:-1] # remove last element which is empty (because of split)
lists = [i.strip() for i in lists] # remove possible spaces and tabs
lists = [i.strip('[') for i in lists] # remove '[' that is left on beginning of every element
lists = [i.split() for i in lists] # split every element to get list
lists = [[float(j) for j in i] for i in lists] # convert lists of strings to lists of numbers
print(lists) # result is list of lists
## short version
lists = [[float(j) for j in i.strip().strip('[').split()] for i in text.split(']')[:-1]]
print(lists)