我是Python新手,正在开设一些在线课程。我正在尝试将一些数据从段落格式转换为CSV格式(如下所示。)我能够导入包含段落格式的文本文件并将其导出为CSV但段落格式中的每一行都以单行格式输入导入电子表格。
import csv
import glob
import os
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder:")
txt_files = os.path.join(directory, '*.txt')
for txt_file in glob.glob(txt_files):
with open(txt_file, "rb") as input_file:
in_txt = csv.reader(input_file, delimiter='=')
filename = os.path.splitext(os.path.basename(txt_file))[0] + '.csv'
with open(os.path.join(output, filename), 'wb') as output_file:
out_csv = csv.writer(output_file)
out_csv.writerows(in_txt)
我不知道如何解析数据以将标签和空格与数值分开,并将每个段落部分组合成一行,其中包含CSV文件的引号和逗号。任何帮助将不胜感激!
段落格式:
12-03-06 15:19:36
流量:1.17365 g / m
POS:+ 9273x1Gal
12-03-06 15:19:37
流量:1.17849 g / m
POS:+ 9283x1Gal
12-03-06 15:19:38
流量:1.19849克/米
POS:+ 9293x1Gal
(重复)
所需的CSV输出(注意,我必须在+之前添加单引号以允许正确导入文本到电子表格中,否则它会以0形式输入)
“12-03-06 15:19:36”,“FLOW:”,“1.17365”,“g / m”,“POS:”,“'+”,“9273”,“x1”,“加尔“
“12-03-06 15:19:37”,“FLOW:”,“1.17849”,“g / m”,“POS:”,“'+”,“9283”,“x1”,“Gal”<登记/>
“12-03-06 15:19:38”,“FLOW:”,“1.19849”,“g / m”,“POS:”,“'+”,“9293”,“x1”,“Gal”< BR />
答案 0 :(得分:0)
我建议您使用collections.deque
一次处理三行,并re.match
解析您想要的项目:
# -*- coding: utf-8 -*-
from collections import deque
import csv
from functools import partial
import glob
import os
import re
import sys
if sys.hexversion < 0x3000000:
# Python 2.x
inp = raw_input
open_csv_write = partial(open, mode="wb")
else:
# Python 3.x
inp = input
open_csv_write = partial(open, mode="w", newline="")
POS_REG = re.compile("(POS:) ([+-])(\d+(?:\.\d+)?)(x\d+)(\w+)", re.I)
def change_ext(fn, new_ext):
"""
Given `fn` as "path\filename.old_ext",
return "path\filename" + new_ext
"""
return os.path.splitext(fn)[0] + new_ext
def get_pos(line, reg=POS_REG):
"""
Given a string like "POS: +92.73x1Gal",
return ['POS:', '+', '92.73', 'x1', 'Gal']
"""
match = reg.match(line)
return list(match.groups()) if match else []
def process(inf, outcsv):
# line queue
q = deque(maxlen=3)
# preload two lines
q.append(next(inf, '').rstrip())
q.append(next(inf, '').rstrip())
# process rest of lines
for line in inf:
q.append(line.rstrip())
if q[1].startswith('FLOW:'):
pos = get_pos(line)
if pos:
row = [q[0]] + q[1].split() + pos
outcsv.writerow(row)
def main():
# get directories
in_dir = inp("Input directory: ")
out_dir = inp("Output directory: ")
# process file names
in_filespec = os.path.join(in_dir, '*.txt')
in_full_names = glob.glob(in_filespec)
in_names = [os.path.basename(fn) for fn in in_full_names]
out_names = [change_ext(fn, ".csv") for fn in in_names]
out_full_names = [os.path.join(out_dir, fn) for fn in out_names]
# operate on files
for in_name, out_name in zip(in_full_names, out_full_names):
with open(in_name) as inf, open_csv_write(out_name) as outf:
outcsv = csv.writer(outf)
process(inf, outcsv)
if __name__ == "__main__":
main()