我构架了一些代码,将某些xlsb文件转换为csv文件,并使用此功能遍历路径列表并将200多个文件转换为csv。
import os
import win32com.client
import subprocess
from pathlib import Path, PurePath
def xl_file_to_csv(xl_file_path, save_path_raw, worksheet_name):
"""
Open a workbook and save sheet as csv files
:param xl_file_path: workbook path
:return: csv file name
:save_path_raw: raw string save path
:worksheet_name: raw string worksheet name
:output_csv_path: Outputs csv path
"""
#
FNULL = open(os.devnull, 'w')
# Sets save_path as Path class and returns file name w/o suffix from path and initializes file names
save_path = save_path_raw
file_name = Path(xl_file_path).stem
file_name = file_name + ' - ' + worksheet_name + '.csv'
output_csv_path = save_path / file_name
# Checks if current file exists and calls kill excel process just in case
exist_check = os.path.isfile(save_path / file_name)
if exist_check:
pass
subprocess.call('taskkill.exe /f /im EXCEL.EXE', stdout=FNULL,
stderr=subprocess.STDOUT)
else:
# kills excel.exe process just in case
subprocess.call('taskkill.exe /f /im EXCEL.EXE', stdout=FNULL,
stderr=subprocess.STDOUT)
# Opens and initializes excel
xl_app = win32com.client.Dispatch("Excel.Application")
xl_app.Visible = 0
xl_app.DisplayAlerts = 0
# Opens workbook and work sheet
work_book = xl_app.Workbooks.Open(xl_file_path)
work_sheet = work_book.Worksheets(worksheet_name)
# Saves as csv in excel and quits
work_sheet.SaveAs(output_csv_path, 6)
work_book.Close(SaveChanges=0)
xl_app.Quit()
# kills excel.exe process
subprocess.call('taskkill.exe /f /im EXCEL.EXE', stdout=FNULL,
stderr=subprocess.STDOUT)
return output_csv_path
但是,此函数似乎会严重泄漏内存(没有子进程功能),所以我使用子进程杀死EXCEL.EXE的每个迭代都是xl_app.Quit()似乎没有结束excel进程,这似乎就像一个极端的解决方案。我想知道是否有更好的方法解决这个问题?
编辑:该函数在如下的for循环中被调用:
for index, row in df.interrows():
output_csv_path = xl_file_to_csv(row['path'], save_path, 'Worksheet')