例如:一名学生有三个主题。 在subj 1:“A”等级,subj 2:“”等级,subj 3:“c”等级。
这里对于subj2没有等级值而且它是空的,但是这里它取subj 3值“c”表示subj 2的值......
import tabula
def readpdf():
df = tabula.read_pdf("/tmp/university_exam_results.pdf", output_format="json", pages="all")
page = 1
student_subject_grade = {}
subject_codes = []
for entry in df:
table_row = 1
subject_split = False
for row in entry['data']:
# subject_split = False
if table_row == 1:
col = 0
pagebeginning = False
for column in row:
if col == 0 and column['text'] == '':
pagebeginning = True
subject_codes = []
elif col == 1 and column['text'] == "Subject Code - >":
pagebeginning = True
subject_codes = []
if not pagebeginning:
if col == 0:
registration_number = column['text']
if not registration_number in student_subject_grade:
student_subject_grade[registration_number] = {}
elif col == 1:
student_name = column['text']
student_subject_grade[registration_number]['name'] = student_name
elif col > 1:
student_subject_grade[registration_number][subject_codes[col-2]] = column['text']
if col > 1:
col += 1
elif table_row == 2:
if pagebeginning:
col = 0
for column in row:
if col == 0 and column['text'] == 'Reg. Number':
elif col == 0 and column['text'] == '':
subject_split = True
elif col == 1 and column['text'] == 'Stud. Name':
elif col == 1 and column['text'] == '' and subject_split:
subject_split = True
if subject_split and col > 1:
subject_codes[col-2] = subject_codes[col-2] + column['text']
col += 1
col = 0
for column in row:
if col == 0:
registration_number = column['text']
if not registration_number in student_subject_grade:
student_subject_grade[registration_number] = {}
elif col == 1:
student_name = column['text']
student_subject_grade[registration_number]['name'] = student_name
elif col > 1:
student_subject_grade[registration_number][subject_codes[col-2]] = column['text']
col += 1
if pagebeginning and subject_split and table_row == 3:
col = 0
for column in row:
if col == 0 and column['text'] == 'Reg. Number':
elif col == 1 and column['text'] == 'Stud. Name':
col += 1
col = 0
for column in row:
if col == 0:
registration_number = column['text']
if not registration_number in student_subject_grade:
student_subject_grade[registration_number] = {}
elif col == 1:
student_name = column['text']
student_subject_grade[registration_number]['name'] = student_name
elif col > 1:
student_subject_grade[registration_number][subject_codes[col-2]] = column['text']
col += 1
table_row += 1
page += 1
total_students = 1
university_performance_ids = []
for key, details in student_subject_grade.iteritems():
if key == '953413114041':
print "---------------------------------------------------------------------------"
print total_students, key
print "---------------------------------------------------------------------------"
print details, "--------------------------------------------------------------------------"
registration_number = key
student_name = details['name']
for k, v in details.iteritems():
if key == '953413114041':
print k, ":", v
if k == 'name':
if v != '':
university_performance_ids.append((0, 0, {'registration_number': registration_number, 'student_name': student_name,
'subject_code': k, 'grade': v}))
print "------------------------------------------------------------------------"
total_students += 1
return university_performance_ids