将.xls转换为.xlsx,以便Openpyxl可以使用它

时间:2016-07-21 01:07:11

标签: python openpyxl xlrd

我写了一个烧瓶应用程序,用于处理一些excel文件,但是,我为.xlsx文件编写了它。输入的文件可能是.xls文件,我知道Openpyxl无法打开。如何在我的应用程序中使用Openpyxl处理文件之前将文件转换为.xlsx?

我在网上看到有关使用xlrd将原始.xls写入Openpyxl可以处理的.xlsx文件的内容,但我无法根据我的具体应用进行定制。

提前致谢!

from openpyxl import load_workbook
from openpyxl.styles import Style, Font
from flask import Flask, request, render_template, redirect, url_for, send_file
import os


app = Flask(__name__)

@app.route('/')
def index():
    return """<center><body bgcolor="#FACC2E">
          <h1><p>Automated TDX Report</h1></p><br>
    <form action="/upload" method=post enctype=multipart/form-data>
      <p><input type=file name=file>
         <input type=submit value=Upload>
    </form></center></body>"""

@app.route('/upload', methods = ['GET', 'POST'])
def upload():
    if request.method == 'POST':
        f = request.files['file']
        f.save(f.filename)
        return process(f.filename)

def process(filename):


    VP = ['ZYJD', 'ZYJC', 'ZYKC', 'ZYKA', 'ZYKB', 'ZYKD', 'ZYKE', 'ZYKF', 'ZYJB', 'ZYJX', 'ZYKG', 'ZYKH', 'ZYJE', 'ZYJA', 
         'ZYKI', 'ZYKX', 'ZYKK', 'ZYKJ', 'ZYJF', 'ZYJK', 'ZYJG', 'ZYJJ', 'ZYKL', 'ZYKM', 'ZYKN']

    VA = ['ZYIC', 'ZYIB', 'ZYHC', 'ZYIA', 'ZYHA', 'ZYHG', 'ZYHB', 'ZYID', 'ZYDA', 'ZYIE', 'ZYHD', 'ZYIG', 'ZYIX', 'ZYHE', 
         'ZYIF', 'ZYHX', 'ZYDE', 'ZYHF', 'ZYLB', 'ZYAC', 'ZYCF', 'ZYDF', 'ZYBG', 'ZYDG', 'ZYDD', 'ZYDH', 'ZYCB', 'ZYCA', 
         'ZYWA', 'ZYWB', 'ZYWC', 'ZYWD', 'ZYWE', 'ZYWF', 'ZYWG', 'ZYWI', 'ZYWJ']

    Gordon = ['ZYDB', 'ZYDX', 'ZYEB', 'ZYED', 'ZYEC', 'ZYEA', 'ZYEX', 'ZYFE', 'ZYFX', 'ZYFD', 'ZYFA', 'ZYFC', 'ZYFB', 
             'ZYGC', 'ZYGA', 'ZYGX', 'ZYGB', 'ZYGF', 'ZYGG', 'ZYGD', 'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD', 'ZYKM', 'ZYKN', 
             'ZYCC', 'ZYCE']

    Pete = ['ZYAD', 'ZYBX', 'ZYAX', 'ZYAB', 'ZYBC', 'ZYBA', 'ZYBB', 'ZYAA', 'ZYBD', 'ZYLE', 'ZYCX', 'ZYAE', 'ZYCC', 'ZYCE', 
           'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD']

    Mike = ['ZYKP', 'ZYAP', 'ZYHP', 'ZYJP', 'ZYFP', 'ZYJR', 'ZYCP', 'ZYIR', 'ZYAR', 'ZYBP', 'ZYKR', 'ZYJS', 'ZYIP', 'ZYHR',
           'ZYEP', 'ZYFF', 'ZYGP', 'ZYKS', 'ZYEE', 'ZYJH', 'ZYII', 'ZYHH', 'ZYJW']

    workbook = load_workbook(filename)
    worksheet = workbook.active

    worksheet.column_dimensions.group('B', hidden=True)
    worksheet.column_dimensions.group('D', hidden=True)
    worksheet.column_dimensions.group('E', hidden=True)
    worksheet.column_dimensions.group('F', hidden=True)
    worksheet.column_dimensions.group('G', hidden=True)
    worksheet.column_dimensions.group('H', hidden=True)
    worksheet.column_dimensions.group('I', hidden=True)
    worksheet.column_dimensions.group('K', hidden=True)
    worksheet.column_dimensions.group('L', hidden=True)
    worksheet.column_dimensions.group('M', hidden=True)
    worksheet.column_dimensions.group('N', hidden=True)
    worksheet.column_dimensions.group('O', hidden=True)
    worksheet.column_dimensions.group('P', hidden=True)
    worksheet.column_dimensions.group('Q', hidden=True)
    worksheet.column_dimensions.group('R', hidden=True)
    worksheet.column_dimensions.group('S', hidden=True)
    worksheet.column_dimensions.group('T', hidden=True)
    worksheet.column_dimensions.group('U', hidden=True)
    worksheet.column_dimensions.group('V', hidden=True)
    worksheet.column_dimensions.group('W', hidden=True)
    worksheet.column_dimensions.group('X', hidden=True)
    worksheet.column_dimensions.group('Y', hidden=True)
    worksheet.column_dimensions.group('Z', hidden=True)
    worksheet.column_dimensions.group('AA', hidden=True)
    worksheet.column_dimensions.group('AB', hidden=True)
    worksheet.column_dimensions.group('AC', hidden=True)
    worksheet.column_dimensions.group('AD', hidden=True)
    worksheet.column_dimensions.group('AE', hidden=True)
    worksheet.column_dimensions.group('AF', hidden=True)
    worksheet.column_dimensions.group('AG', hidden=True)
    worksheet.column_dimensions.group('AH', hidden=True)
    worksheet.column_dimensions.group('AI', hidden=True)
    worksheet.column_dimensions.group('AJ', hidden=True)
    worksheet.column_dimensions.group('AK', hidden=True)
    worksheet.column_dimensions.group('AM', hidden=True)
    worksheet.column_dimensions.group('AN', hidden=True)
    worksheet.column_dimensions.group('AP', hidden=True)
    worksheet.column_dimensions.group('AQ', hidden=True)
    worksheet.column_dimensions.group('AR', hidden=True)
    worksheet.column_dimensions.group('AS', hidden=True)
    worksheet.column_dimensions.group('AT', hidden=True)
    worksheet.column_dimensions.group('AU', hidden=True)
    worksheet.column_dimensions.group('AV', hidden=True)
    worksheet.column_dimensions.group('AW', hidden=True)
    worksheet.column_dimensions.group('AX', hidden=True)
    worksheet.column_dimensions.group('AY', hidden=True)
    worksheet.column_dimensions.group('AZ', hidden=True)
    worksheet.column_dimensions.group('BA', hidden=True)
    worksheet.column_dimensions.group('BB', hidden=True)
    worksheet.column_dimensions.group('BC', hidden=True)
    worksheet.column_dimensions.group('BD', hidden=True)
    worksheet.column_dimensions.group('BE', hidden=True)
    worksheet.column_dimensions.group('BF', hidden=True)
    worksheet.column_dimensions.group('BH', hidden=True)
    worksheet.column_dimensions.group('BI', hidden=True)
    worksheet.column_dimensions.group('BJ', hidden=True)
    worksheet.column_dimensions.group('BK', hidden=True)
    worksheet.column_dimensions.group('BL', hidden=True)
    worksheet.column_dimensions.group('BM', hidden=True)
    worksheet.column_dimensions.group('BN', hidden=True)
    worksheet.column_dimensions.group('BO', hidden=True)
    worksheet.column_dimensions.group('BP', hidden=True)
    worksheet.column_dimensions.group('BQ', hidden=True)
    worksheet.column_dimensions.group('BR', hidden=True)
    worksheet.column_dimensions.group('BS', hidden=True)
    worksheet.column_dimensions.group('BT', hidden=True)
    worksheet.column_dimensions.group('BU', hidden=True)
    worksheet.column_dimensions.group('BV', hidden=True)
    worksheet.column_dimensions.group('BW', hidden=True)
    worksheet.column_dimensions.group('BX', hidden=True)
    worksheet.column_dimensions.group('BY', hidden=True)
    worksheet.column_dimensions.group('BZ', hidden=True)
    worksheet.column_dimensions.group('CA', hidden=True)
    worksheet.column_dimensions.group('CB', hidden=True)
    worksheet.column_dimensions.group('CC', hidden=True)
    worksheet.column_dimensions.group('CD', hidden=True)
    worksheet.column_dimensions.group('CE', hidden=True)
    worksheet.column_dimensions.group('CF', hidden=True)
    worksheet.column_dimensions.group('CG', hidden=True)
    worksheet.column_dimensions.group('CH', hidden=True)
    worksheet.column_dimensions.group('CI', hidden=True)
    worksheet.column_dimensions.group('CJ', hidden=True)
    worksheet.column_dimensions.group('CK', hidden=True)
    worksheet.column_dimensions.group('CL', hidden=True)
    worksheet.column_dimensions.group('CM', hidden=True)
    worksheet.column_dimensions.group('CN', hidden=True)
    worksheet.column_dimensions.group('CO', hidden=True)
    worksheet.column_dimensions.group('CP', hidden=True)
    worksheet.column_dimensions.group('CQ', hidden=True)
    worksheet.column_dimensions.group('CR', hidden=True)
    worksheet.column_dimensions.group('CS', hidden=True)
    worksheet.column_dimensions.group('CU', hidden=True)


    routecolumn = worksheet.columns[58]
    i = 2
    supervisorheader = worksheet.cell("CV1")
    s = Style(font=Font(bold=True))
    supervisorheader.style = s

    worksheet['CV1'] = 'Supervisor'
    for route in routecolumn:
        if route.value == 'Responsible Route':
            continue
        if route.value in Gordon:
            pos = Gordon.index(route.value)
            worksheet['CV' + str(i)].value = 'Gordon'
            i += 1
        elif route.value in VA:
            pos = VA.index(route.value)
            worksheet['CV' + str(i)].value = 'Vinny A'
            i += 1
        elif route.value in VP:
            pos = VP.index(route.value)
            worksheet['CV' + str(i)].value = 'Vinny P'
            i += 1
        elif route.value in Pete:
            pos = Pete.index(route.value)
            worksheet['CV' + str(i)].value = 'Pete'
            i += 1
        elif route.value in Mike:
            pos = Mike.index(route.value)
            worksheet['CV' + str(i)].value = 'Mike'
            i += 1
        elif route.value not in Gordon or route.value not in VA or route.value not in VP or route.value not in Pete \
        or route.value not in Mike:
            worksheet['CV' + str(i)].value = 'Building'
            i += 1


    workbook.save(filename)
    newfilename = filename.strip(".xlsx")
    newfilename = newfilename + ".xls"
    os.rename('/home/MY NAME/'+filename, '/home/MY NAME/'+newfilename)

    return send_file(newfilename, attachment_filename='tdx.xls', as_attachment=True), os.remove ('/home/MY NAME/'+newfilename)



if __name__ == '__main__':
    app.run(debug = True, host = '0.0.0.0')

2 个答案:

答案 0 :(得分:1)

我的建议:使用xlrd读取您需要的值,并使用openpyxl创建新的xlsx工作簿。您只需要能够使用xlrd获取所有相关信息。

你唯一要做的就是从单元格中读取值,这就是选择&#34; active&#34;工作表。我在名为&#34; xlrd:获取活动表&#34;的帖子中找到this Google Groups post它表示尝试使用.sheet_selected属性或.sheet_visible属性,并使用以下代码检查哪些有效:

import xlrd
b = xlrd.open_workbook("vis.xls") # where vis.xls is your test file
for i, s in enumerate(b.sheets()):
    print(i, s.name, s.sheet_selected, s.sheet_visible)

它还注意到:

  

这两个属性记录在dict sheet.py的{​​{1}}中   从第2行开始。_WINDOW_2可能就是你想要的。   忽略将此位显示为未定义的MS文档。

答案 1 :(得分:1)

最好的办法是通过headless mode中的OpenOffice / LibreOffice管道XLS文件进行转换,然后在Python中加载文件进行处理。