Python数据加载速度慢

时间:2018-01-09 22:38:10

标签: python-3.x performance pandas parallel-processing sqlalchemy

我正在尝试将Excel和批量上传数据读取到SQL Server表。数据加载工作完美,但花费的时间比预期的要长。 28,000记录需要80秒。我需要多次加载2-3 GB的文件。我是Python的新手,你能看看脚本,让我知道你是否可以优化脚本以提高性能?

import os.path
import pandas as pd
import sqlalchemy
import timeit


def _get_file_path():
    print('Loading GL Delta file Path... ')
    file_name = \
        'FormPF_Paloma_GLExtract_07312017.xlsx'
    basedir = 'C://Users/XXXXX/PycharmProjects/ETLApplication/Extraction/XXYY/GL/InputFile'
    full_path = os.path.join(basedir, file_name)
    print('Loading File Path Completed.')
    return full_path


def _get_connection():
    engine = \
        sqlalchemy.create_engine(
            'mssql+pyodbc://XXXXXXXXX?driver=SQL+Server+Native+Client+11.0'
        )
    return engine


def _process_file():
    sheet_name = 'Sheet1'
    table_name = 'XXXX_GL_File'
    connection = _get_connection()
    file_path = _get_file_path()
    print('Established Connection..')
    print('Reading GL Delta Excel File.....!!!!')
    xls = pd.ExcelFile(file_path)
    gl_data_frame = xls.parse(sheet_name, index_col="ID")
    print(gl_data_frame)
    print('inserting GL Delta data in Database..')

    gl_data_frame.to_sql(table_name, connection, if_exists='append')
    print('GL Delta Data loaded Successfully!')


def __main__():
    try:
        start_time = timeit.default_timer()
        print('#### STARTING DATA LOADING PROCESS #############')
        _process_file()
        elapsed_time = timeit.default_timer() - start_time
        print('#### Total Execution Time in milliseconds: #############')
        print(elapsed_time)
    except:
       print('Exception Occurred! ')


if __name__ == '__main__':
    __main__()

0 个答案:

没有答案