我正在尝试将Excel和批量上传数据读取到SQL Server表。数据加载工作完美,但花费的时间比预期的要长。 28,000记录需要80秒。我需要多次加载2-3 GB的文件。我是Python的新手,你能看看脚本,让我知道你是否可以优化脚本以提高性能?
import os.path
import pandas as pd
import sqlalchemy
import timeit
def _get_file_path():
print('Loading GL Delta file Path... ')
file_name = \
'FormPF_Paloma_GLExtract_07312017.xlsx'
basedir = 'C://Users/XXXXX/PycharmProjects/ETLApplication/Extraction/XXYY/GL/InputFile'
full_path = os.path.join(basedir, file_name)
print('Loading File Path Completed.')
return full_path
def _get_connection():
engine = \
sqlalchemy.create_engine(
'mssql+pyodbc://XXXXXXXXX?driver=SQL+Server+Native+Client+11.0'
)
return engine
def _process_file():
sheet_name = 'Sheet1'
table_name = 'XXXX_GL_File'
connection = _get_connection()
file_path = _get_file_path()
print('Established Connection..')
print('Reading GL Delta Excel File.....!!!!')
xls = pd.ExcelFile(file_path)
gl_data_frame = xls.parse(sheet_name, index_col="ID")
print(gl_data_frame)
print('inserting GL Delta data in Database..')
gl_data_frame.to_sql(table_name, connection, if_exists='append')
print('GL Delta Data loaded Successfully!')
def __main__():
try:
start_time = timeit.default_timer()
print('#### STARTING DATA LOADING PROCESS #############')
_process_file()
elapsed_time = timeit.default_timer() - start_time
print('#### Total Execution Time in milliseconds: #############')
print(elapsed_time)
except:
print('Exception Occurred! ')
if __name__ == '__main__':
__main__()