Question

我正在尝试读取一个混合字符串和浮点数的txt文件像这样：

n_rows=55;    #This describes the mask array below, not the experiment!!
n_cols=32;
# Note that 'columns' run down and rows run across!

mask = [
/*RC1   0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 */
/* 0 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,
/* 1 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 2 */ 1,0,0,1,1,1,1,0,0,0, 0,0,0,0,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,1,0,0, 1,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,
/* 3 */ 0,0,0,1,1,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,1, 0,0,0,0,1,1,1,1,1,0, 1,0,1,1,1,
/* 4 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 5 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,
/* 6 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0, 0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 7 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 1,1,0,0,1,
/*RC2   0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 */

我唯一想要的是/ * n * /之后的数字，我最终可以得到一个由1和0组成的矩阵。总共有32行（示例文件只显示8行），中间有无用的行它们。

我尝试了一些非常愚蠢的方法：

txtlines = tuple(open(filename, 'r'))   #read files so that each whole line in txt file become an element of a list)
txtlines=list(txtlines)

import re
pattern = re.compile("/*[0-31]*/")     #set a pattern to remove unwanted lines


gen = [i for i in txtlines if pattern.match(i)==None] # The useless element
lines_cut = [x for x in txtlines if x not in gen]

我打算切断＆＃39; / * n * /＆＃39;稍后将每个元素更改为[0,1,0,1,0,0,0，...]的1d数组，并将它们全部附加为2d数组。

目前有两个问题：

我没有通过该模式成功切割所有无用元素，如/ * RC2 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 * /遗骸
从行切割gen后，剩余元素的顺序完全改变，最初第一个元素是/ 0 /但不是＆＃39; s / 25 /。但我真的需要这个命令才能坚持下去。

我有点通过将列表更改为数组来解决问题2，然后删除

array=np.asarray(txtlines)
gen_array=np.asarray(gen)
array_cut=[x for x in array if x not in gen_array]

这似乎有效，但我不确定我是否做了正确的事。

Answer 1

您的正则表达式不正确。你需要转义'*'而不是[0-31]你需要[0-9] +，即一个或多个数字。例如，

import re
import numpy as np

def get_line(filename):
    pattern = re.compile('^/\* *[0-9]+ *\*/(.*)')    
    with open(filename, 'r') as file:
        for line in file:
            m = re.match(pattern, line)
            if m:
                yield m.group(1).strip(', ').split(',')

m = np.matrix([l for l in get_line(filename)])

Answer 2

a = []
with open("tonparr.txt","r") as f:
    for line in f:
        if line[0:3] == "/* ":
            a.append(line[8:-1])

b= []
for x in range(0,len(a)):
    b.append([])
    for i in a[x].split(","):
        if i.isdigit():
            b[x].append(int(i))

生成每行的2D数组作为int数组，从那里你只需要将每个数组转换为numpy数组。对不起，如果我错误地解释了这个问题。

Answer 3

result = []
with open('data.txt') as data:
    for line in data:
        if not line.startswith('/* '): continue
        pieces = line.split('*/')
        result.append(pieces[1].strip().replace(' ', ''))
for row in result:
    print (row)

您正在寻找以'/* '打开的行。当你发现一个在'*/'上拆分它并保留'result`数组中的右边一块减去空白。

读取txt文件并使用它的某些特定部分来获取数组

3 个答案: