读取txt文件并使用它的某些特定部分来获取数组

时间:2017-05-18 17:44:53

标签: python numpy

我正在尝试读取一个混合字符串和浮点数的txt文件 像这样:

n_rows=55;    #This describes the mask array below, not the experiment!!
n_cols=32;
# Note that 'columns' run down and rows run across!

mask = [
/*RC1   0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 */
/* 0 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,
/* 1 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 2 */ 1,0,0,1,1,1,1,0,0,0, 0,0,0,0,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,1,0,0, 1,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,
/* 3 */ 0,0,0,1,1,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,1, 0,0,0,0,1,1,1,1,1,0, 1,0,1,1,1,
/* 4 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 5 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,1,
/* 6 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0, 0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,
/* 7 */ 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 1,1,0,0,1,
/*RC2   0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 5 6 7 8 9  0 1 2 3 4 */

我唯一想要的是/ * n * /之后的数字,我最终可以得到一个由1和0组成的矩阵。总共有32行(示例文件只显示8行),中间有无用的行它们。

我尝试了一些非常愚蠢的方法:

txtlines = tuple(open(filename, 'r'))   #read files so that each whole line in txt file become an element of a list)
txtlines=list(txtlines)

import re
pattern = re.compile("/*[0-31]*/")     #set a pattern to remove unwanted lines


gen = [i for i in txtlines if pattern.match(i)==None] # The useless element
lines_cut = [x for x in txtlines if x not in gen]

我打算切断' / * n * /'稍后将每个元素更改为[0,1,0,1,0,0,0,...]的1d数组,并将它们全部附加为2d数组。

目前有两个问题:

  1. 我没有通过该模式成功切割所有无用元素,如/ * RC2 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 * /遗骸

  2. 从行切割gen后,剩余元素的顺序完全改变,最初第一个元素是/ 0 /但不是' s / 25 /。但我真的需要这个命令才能坚持下去。

  3. 我有点通过将列表更改为数组来解决问题2,然后删除

    array=np.asarray(txtlines)
    gen_array=np.asarray(gen)
    array_cut=[x for x in array if x not in gen_array] 
    

    这似乎有效,但我不确定我是否做了正确的事。

3 个答案:

答案 0 :(得分:1)

您的正则表达式不正确。你需要转义'*'而不是[0-31]你需要[0-9] +,即一个或多个数字。例如,

import re
import numpy as np

def get_line(filename):
    pattern = re.compile('^/\* *[0-9]+ *\*/(.*)')    
    with open(filename, 'r') as file:
        for line in file:
            m = re.match(pattern, line)
            if m:
                yield m.group(1).strip(', ').split(',')

m = np.matrix([l for l in get_line(filename)])

答案 1 :(得分:0)

a = []
with open("tonparr.txt","r") as f:
    for line in f:
        if line[0:3] == "/* ":
            a.append(line[8:-1])

b= []
for x in range(0,len(a)):
    b.append([])
    for i in a[x].split(","):
        if i.isdigit():
            b[x].append(int(i))

生成每行的2D数组作为int数组,从那里你只需要将每个数组转换为numpy数组。对不起,如果我错误地解释了这个问题。

答案 2 :(得分:0)

result = []
with open('data.txt') as data:
    for line in data:
        if not line.startswith('/* '): continue
        pieces = line.split('*/')
        result.append(pieces[1].strip().replace(' ', ''))
for row in result:
    print (row)

您正在寻找以'/* '打开的行。当你发现一个在'*/'上拆分它并保留'result`数组中的右边一块减去空白。