这是我的代码:
import openpyxl
import os
os.chdir('c:\\users\\Desktop')
wb= openpyxl.load_workbook(filename= 'excel.xlsx',data_only = True)
wb.create_sheet(index=0,title='Summary')
sumsheet= wb.get_sheet_by_name('Summary')
print('Creating Summary Sheet')
#loop through worksheets
print('Looping Worksheets')
for sheet in wb.worksheets:
for row in sheet.iter_rows():
for cell in row:
#find headers of columns needed
if cell.value=='LowLimit':
lowCol=cell.column
if cell.value=='HighLimit':
highCol=cell.column
if cell.value=='MeasValue':
measCol=cell.column
#name new columns
sheet['O1']='meas-low'
sheet['P1']='high-meas'
sheet['Q1']='Minimum'
sheet['R1']='Margin'
#find how many rows of each sheet
maxrow=sheet.max_row
i=0
#subtraction using max row
for i in range(2,maxrow+1):
if sheet[str(highCol)+str(i)].value=='---':
sheet['O'+str(i)]='='+str(measCol)+str(i)+'-'+str(lowCol)+str(i)
sheet['P'+str(i)]='=9999'
sheet['Q'+str(i)]='=MIN(O'+str(i)+':P'+str(i)+')'
sheet['R'+str(i)]='=IF(AND(Q'+str(i)+'<3,Q'+str(i)+'>-3),"Marginal","")'
elif sheet[str(lowCol)+str(i)].value=='---':
sheet['O'+str(i)]='=9999'
sheet['P'+str(i)]='='+str(highCol)+str(i)+'-'+str(measCol)+str(i)
sheet['Q'+str(i)]='=MIN(O'+str(i)+':P'+str(i)+')'
sheet['R'+str(i)]='=IF(AND(Q'+str(i)+'<3,Q'+str(i)+'>-3),"Marginal","")'
else:
sheet['O'+str(i)]='='+str(measCol)+str(i)+'-'+str(lowCol)+str(i)
sheet['P'+str(i)]='='+str(highCol)+str(i)+'-'+str(measCol)+str(i)
sheet['Q'+str(i)]='=MIN(O'+str(i)+':P'+str(i)+')'
sheet['R'+str(i)]='=IF(AND(Q'+str(i)+'<3,Q'+str(i)+'>-3),"Marginal","")'
++i
print('Saving new wb')
import os
os.chdir('C:\\Users\\hpj683\\Desktop')
wb.save('example.xlsx')
这完全没问题,只需要4分钟即可完成一个excel工作簿。有什么办法可以优化我的代码以使其运行更快?我在网上的研究建议改为read_only或write_only以使其运行得更快,但是我的代码需要读取和写入excel工作簿,所以这些都不起作用。
答案 0 :(得分:0)
代码可以从分解为单独的函数中受益。这将帮助您识别慢速位并逐位替换它们。
以下位不应该在每一行的循环中:
ws.max_row
这是非常昂贵的ws["C" + str(i)]
。使用ws.cell(row=i, column=3)
如果嵌套循环不是格式化错误,那么为什么嵌套?
此外,您应该查看配置文件模块以找出速度慢的内容。您可能希望从去年的PyCon UK获得watch my talk on profiling openpyxl。
祝你好运!