Question

我正在编写一个Python程序来为我的大学生成考勤报告，这是我到目前为止所做的：

--> Load the input workbook
--> Do some manipulation to it and save it as 'Output.xlsx'
--> Read the 'Output.xlsx' for next iteration    ###Here the problem occurs
--> Do some more manipulation based on input workbook and Output.xlsx
--> Save it as 'TIMESTAMP HERE.xlsx'

当OpenPyxl保存＆＃39; Output.xlsx＆＃39;时，它似乎压缩了它，当我在MS Excel中打开它时，似乎解压缩并重新应用所有格式。所以，当我打开输出.xxx＆＃39;对于第二次迭代，需要永远加载。我尝试使用：

output = openpyxl.load_workbook('Output.xlsx',use_iterators = True)

然后加载速度非常快，但我不能再写了。

有没有办法像use_iterables param那样快速加载工作簿？有没有替代方法呢？

如果你需要看一下我的代码：

import openpyxl,datetime
workbookname = raw_input('Enter the workbook name: ')
outputsheetname = workbookname
workbook = openpyxl.load_workbook(workbookname)
allsheets = workbook.get_sheet_names()
print(allsheets)
weeklysheetname = [x for x in allsheets if 'WEEKLY' in x]
for sheetname in allsheets:
    if not sheetname.startswith('I'):
        sheet = workbook.get_sheet_by_name(sheetname)
        count = 0
        markedupto = 0
        highestdatarow = sheet.get_highest_row()
        if 'WEEKLY' in sheetname:
            break
        if '-' not in sheetname:
            for each in sheet.iter_rows('D7:CD7'):
                for cell in each:
                    if(cell.value != None):
                        markedupto += 1
        else:
            for each in sheet.iter_rows('D7:W7'):
                for cell in each:
                    if(cell.value != None):
                        markedupto += 1
        print markedupto
        if '-' not in sheetname:
            for each in range(10,highestdatarow):
                for each in sheet.iter_rows('D'+str(each)+':CD'+str(each)):
                    for cell in each:
                        if cell.value not in ['a','A']:
                            cell.value = None
        else:
            for each in range(10,highestdatarow):
                for each in sheet.iter_rows('D'+str(each)+':W'+str(each)):
                    for cell in each:
                        if cell.value not in ['a','A']:
                            cell.value = None
        for each in range(10,highestdatarow):
            print('at highest row')
            curmarked = 1
            numcells = 0
            for each in sheet.iter_rows('D'+str(each)+':CD'+str(each)):
                for cell in each:

                    if(cell.value == None):
                        cell.value = curmarked
                        curmarked+= 1
                        numcells += 1
                    if(cell.value in ['a','A']):
                        numcells += 1
                    if(numcells == markedupto):

                        break
        if '-' not in sheetname:
            sheet['CE9'] = markedupto
        else:
            sheet['X9'] = 3*markedupto
print('saving')
workbook.save('Output.xlsx')
print('saved')
sheet = workbook.get_sheet_by_name(weeklysheetname[0])
fromDate = sheet.cell('F5').value
print('fromDate: ',fromDate)
toDate = sheet.cell('H5').value
print('toDate: ',toDate)
output = openpyxl.load_workbook('Output.xlsx')
print('loaded')
sheets = [x for x in output.get_sheet_names() if not (x.startswith('I') or x.endswith('I'))]
print(sheets)
def generate_in_weekly(sheetname):
    subdata = []

    sheet = output.get_sheet_by_name(sheetname)
    number_of_periods = 0
    for each in sheet.iter_rows('D8:CD8'):
        for cell in each:
            if cell.value != None:
                number_of_periods += 1
            else:
                break
    print('Number of periods: ' + str(number_of_periods))
    start_column = 'D8'
    end_column = chr(ord('D') + number_of_periods -1) + '8'
    print('end column' ,end_column)
    print(toDate,fromDate)
    start_cell = ''
    end_cell = ''
    for each in sheet.iter_rows(start_column+':'+end_column):
        for cell in each:
            if(cell.value == fromDate):
                start_cell = cell.column+str(cell.row)
    for each in list(sheet.iter_rows(start_column+':'+end_column))[::-1]:
        for cell in each:
            if(cell.value == toDate):
                end_cell = cell.column+str(cell.row)

    print(fromDate,toDate)
    print(start_cell,end_cell)
    total_val = len(list(sheet.iter_rows(start_cell+':'+end_cell))[0])
    print(total_val)
    start_row = 10
    end_row = sheet.get_highest_row()

    start_col_alpha = start_cell[0:-1]
    end_col_alpha = end_cell[0:-1]
    print(start_col_alpha,end_col_alpha)
    final_values = []
    for student in sheet.iter_rows(start_col_alpha+str(start_row) +':' + end_col_alpha+str(end_row-1)):


        student_vals =  []
        for period in student:
            if isinstance(period.value,int):
                student_vals.append(period.value)
        print('student_values:',student_vals)
        if student_vals !=[]:
            final_values.append(max(student_vals) - min(student_vals) + 1)
        else:
            final_values.append(0)

    weeklysheet = output.get_sheet_by_name(weeklysheetname[0])
    cur_sub_column_alpha = ''
    for each in weeklysheet.iter_rows('E7:O7'):
        for cell in each:
            print('sheetname: ',sheetname,'cell: ',cell.value)
            if(cell.value == sheetname):
                print(cell.value)
                cur_sub_column_alpha = cell.column
                break
    start_row = 9
    print(cur_sub_column_alpha)
    print("Num students: " + str(len(final_values)))
    print(cur_sub_column_alpha+str(start_row) +':' + cur_sub_column_alpha+str(end_row-1))
    for each in weeklysheet.iter_rows(cur_sub_column_alpha+str(start_row) +':' + cur_sub_column_alpha+str(end_row-1)):
        for cell in each:


            if not str(cell.column+str(cell.row)).endswith('38'):
                cell.value = final_values.pop(0)
    weeklysheet[cur_sub_column_alpha+'8'] = total_val
def generate_for_split_labs(sheetname):
    subdata = []

    sheet = output.get_sheet_by_name(sheetname)
    number_of_periods = 0
    for each in sheet.iter_rows('D8:CD8'):
        for cell in each:
            if cell.value != None:
                number_of_periods += 1
            else:
                break
    print('Number of periods: ' + str(number_of_periods))
    start_column = 'D8'
    end_column = chr(ord('D') + number_of_periods -1) + '8'
    print(end_column)
    print(toDate,fromDate)
    start_cell = ''
    end_cell = ''
    for each in sheet.iter_rows(start_column+':'+end_column):
        for cell in each:
            if(cell.value == fromDate):
                start_cell = str(cell).split('.')[-1].strip('>')
    for each in list(sheet.iter_rows(start_column+':'+end_column))[::-1]:
        for cell in each:
            print(cell.value)
            if(cell.value == toDate):
                end_cell = str(cell).split('.')[-1].strip('>')

    print(fromDate,toDate)
    print(start_cell,end_cell)
    total_val = len(list(sheet.iter_rows(start_cell+':'+end_cell))[0])
    print(total_val)
    start_row = 10
    end_row = sheet.get_highest_row()

    start_col_alpha = start_cell[0:-1]
    end_col_alpha = end_cell[0:-1]
    print(start_col_alpha,end_col_alpha)
    final_values = []
    for student in sheet.iter_rows(start_col_alpha+str(start_row) +':' + end_col_alpha+str(end_row-1)):


        student_vals =  []
        for period in student:

            if isinstance(period.value,int):
                student_vals.append(period.value)
        print('student_values:',student_vals)
        if student_vals !=[]:
            final_values.append(max(student_vals) - min(student_vals) + 1)
        else:
            final_values.append(0)
    final_values = [3*x for x in final_values]
    print('Final Values:' ,final_values)
    weeklysheet = output.get_sheet_by_name(weeklysheetname[0])
    cur_sub_cell = ''
    cut_at_hyphen = sheetname.split('-')
    sheetname = cut_at_hyphen[0]
    reminant = cut_at_hyphen[1]
    for each in weeklysheet.iter_rows('E7:O7'):
        for cell in each:

            if(cell.value == sheetname):
                cur_sub_cell = str(cell).split('.')[-1].rstrip('>')
                break
    print(cur_sub_cell)
    cur_sub_column_alpha = cur_sub_cell[0:-1]
    if reminant == 'I':
        start_row = 9
        print(start_row,end_row)
        print("Num students: " + str(len(final_values)))
        end_row = len(final_values)+start_row-1
        print(start_row,end_row)
        for each in weeklysheet.iter_rows(cur_sub_column_alpha+str(start_row) +':' + cur_sub_column_alpha+str(end_row)):
            for cell in each:

                if str(cell).split('.')[-1].rstrip('>')[1:] != '38':
                    cell.value = final_values.pop(0)
        weeklysheet[cur_sub_column_alpha+'8'] = total_val*3
    elif reminant == 'II':
        start_row = 39
        print("Num students: " + str(len(final_values)))
        print(start_row,end_row)
        end_row = len(final_values)+start_row-1
        print(start_row,end_row)
        for each in weeklysheet.iter_rows(cur_sub_column_alpha+str(start_row) +':' + cur_sub_column_alpha+str(end_row)):
            for cell in each:
                print(cell)
                if str(cell).split('.')[-1].rstrip('>')[1:] != '38':
                    cell.value = final_values.pop(0)
        weeklysheet[cur_sub_column_alpha+str(start_row-1)] = total_val*3
for each in sheets:
    generate_in_weekly(each)
generate_for_split_labs('13A99304-I')
generate_for_split_labs('13A99304-II')
generate_for_split_labs('13A05303-I')
generate_for_split_labs('13A05303-II')
output.save(outputsheetname[:-5]+' '+str(datetime.datetime.now())[0:18].replace(':','-') +'.xlsx')

加载工作簿非常慢

0 个答案: