在python中读取多层csv文件

时间:2011-08-03 11:12:19

标签: python list file-io

我需要从文本文件中读取以下数据;

[L02]
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,w,g,g
g,g,g,g,g,g,g,g,w,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,g,g,g,g,g,g
g,g,g,g,g,g,w,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
[L01]
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d

我可以将单个块读取为csv文件,但我不知道如何将每个文件作为单独的列表读取

我想要的输出是每个块都有数组/列表,块内容作为列表元素。有什么想法吗?

2 个答案:

答案 0 :(得分:3)

这是一个脚本,演示如何将问题分解为可重用的步骤(函数)并执行您需要的转换。

import itertools
import operator
import re
import csv
import pprint

class TaggedLine(str):
    """
    Override str to allow a tag to be added.
    """
    def __new__(cls, val, tag):
        return str.__new__(cls, val)

    def __init__(self, val, tag):
        super(TaggedLine, self).__init__(val)
        self.tag = tag

def sections(stream):
    """
    Tag each line of the stream with its [section] (or None)
    """
    section_pattern = re.compile('\[(.*)\]')
    section = None
    for line in stream:
        matcher = section_pattern.match(line)
        if matcher:
            section = matcher.group(1)
            continue
        yield TaggedLine(line, section)

def splitter(stream):
    """
    Group each stream into sections
    """
    return itertools.groupby(sections(stream), operator.attrgetter('tag'))

def parsed_sections(stream):
    for section, lines in splitter(stream):
        yield section, list(csv.reader(lines))

if __name__ == '__main__':
    with open('data.csv') as stream:
        for section, data in parsed_sections(stream):
            print 'section', section
            pprint.pprint(data[:2])

将文件另存为“data.csv”,脚本将使用此输出对您的数据运行:

section L02
[['g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'w',
  'w',
  'w',
  'w',
  'g',
  'g'],
 ['g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'w',
  'w',
  'w',
  'w',
  'w',
  'g',
  'g']]
section L01
[['d',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd'],
 ['d',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd']]

答案 1 :(得分:1)

如果您有numpy,则可以将文件读入numpy数组。 comments='['告诉np.genfromtxt忽略以[开头的行。 reshape方法将每个16x16块放在其自己的“层”中。

import numpy as np
arr=np.genfromtxt('data.csv',comments='[',delimiter=',',dtype=None)
arr=arr.reshape(-1,16,16)

您可以使用arr[n]访问第n个图层。