我正在尝试使用Mike Robins发布的代码,但它在3.7版中失败了。
请让我知道如何解决此问题?谢谢。
How to parse complex text files using Python?
import re
import pandas as pd
parse_re = {
'school': re.compile(r'School = (?P<school>.*)$'),
'grade': re.compile(r'Grade = (?P<grade>\d+)'),
'student': re.compile(r'Student number, (?P<info>\w+)'),
'data': re.compile(r'(?P<number>\d+), (?P<value>.*)$'),
}
def parse(line):
'''parse the line by regex search against possible line formats
returning the id and match result of first matching regex,
or None if no match is found'''
return reduce(lambda (i,m),(id,rx): (i,m) if m else (id, rx.search(line)),
parse_re.items(), (None,None))
results = []
with open('sample.txt') as f:
record = {}
for line in f:
id, match = parse(line)
if match is None:
continue
if id == 'school':
record['School'] = match.group('school')
elif id == 'grade':
record['Grade'] = int(match.group('grade'))
names = {} # names is a number indexed dictionary of student names
elif id == 'student':
info = match.group('info')
elif id == 'data':
number = int(match.group('number'))
value = match.group('value')
if info == 'Name':
names[number] = value
elif info == 'Score':
record['Student number'] = number
record['Name'] = names[number]
record['Score'] = int(value)
results.append(record.copy())
df = pd.DataFrame(results, columns=['School', 'Grade', 'Student number', 'Name', 'Score'])
print df