我正在尝试读取一个混合字符串和浮点数的txt文件 像这样:
n_rows=55; #This describes the mask array below, not the experiment!!
n_cols=32;
# Note that 'columns' run down and rows run across!
mask = [
/*RC1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 */
/* 0 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,
/* 1 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 2 */ 1,0,0,1,1,1,1,0,0,0, 0,0,0,0,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,1,0,0, 1,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,
/* 3 */ 0,0,0,1,1,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,1, 0,0,0,0,1,1,1,1,1,0, 1,0,1,1,1,
/* 4 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 5 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,
/* 6 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0, 0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 7 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 1,1,0,0,1,
/*RC2 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 */
我唯一想要的是/ * n * /之后的数字,我最终可以得到一个由1和0组成的矩阵。总共有32行(示例文件只显示8行),中间有无用的行它们。
我尝试了一些非常愚蠢的方法:
txtlines = tuple(open(filename, 'r')) #read files so that each whole line in txt file become an element of a list)
txtlines=list(txtlines)
import re
pattern = re.compile("/*[0-31]*/") #set a pattern to remove unwanted lines
gen = [i for i in txtlines if pattern.match(i)==None] # The useless element
lines_cut = [x for x in txtlines if x not in gen]
我打算切断' / * n * /'稍后将每个元素更改为[0,1,0,1,0,0,0,...]的1d数组,并将它们全部附加为2d数组。
目前有两个问题:
我没有通过该模式成功切割所有无用元素,如/ * RC2 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 * /遗骸
从行切割gen后,剩余元素的顺序完全改变,最初第一个元素是/ 0 /但不是' s / 25 /。但我真的需要这个命令才能坚持下去。
我有点通过将列表更改为数组来解决问题2,然后删除
array=np.asarray(txtlines)
gen_array=np.asarray(gen)
array_cut=[x for x in array if x not in gen_array]
这似乎有效,但我不确定我是否做了正确的事。
答案 0 :(得分:1)
您的正则表达式不正确。你需要转义'*'而不是[0-31]你需要[0-9] +,即一个或多个数字。例如,
import re
import numpy as np
def get_line(filename):
pattern = re.compile('^/\* *[0-9]+ *\*/(.*)')
with open(filename, 'r') as file:
for line in file:
m = re.match(pattern, line)
if m:
yield m.group(1).strip(', ').split(',')
m = np.matrix([l for l in get_line(filename)])
答案 1 :(得分:0)
a = []
with open("tonparr.txt","r") as f:
for line in f:
if line[0:3] == "/* ":
a.append(line[8:-1])
b= []
for x in range(0,len(a)):
b.append([])
for i in a[x].split(","):
if i.isdigit():
b[x].append(int(i))
生成每行的2D数组作为int数组,从那里你只需要将每个数组转换为numpy数组。对不起,如果我错误地解释了这个问题。
答案 2 :(得分:0)
result = []
with open('data.txt') as data:
for line in data:
if not line.startswith('/* '): continue
pieces = line.split('*/')
result.append(pieces[1].strip().replace(' ', ''))
for row in result:
print (row)
您正在寻找以'/* '
打开的行。当你发现一个在'*/'
上拆分它并保留'result`数组中的右边一块减去空白。