与bufferedReader和JDBC Batch Inserts相比,Spring Batch Performance的原因是什么?

时间:2014-09-26 07:36:22

标签: spring-batch bulkinsert bulkloader

我有一个包含80个String列的文件,其中包含大约2个Lacs记录。 我正在评估将来使用的技术我们将获得2000 Lacs记录文件。 我评估了两件事

Oracle 11 g数据库

方法1 - 春季批次

配置如下

<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:batch="http://www.springframework.org/schema/batch" xmlns:task="http://www.springframework.org/schema/task"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.springframework.org/schema/batch
        http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
        http://www.springframework.org/schema/beans 
        http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">

    <batch:job id="trialDataLoadJob">
        <batch:step id="step1">
            <batch:tasklet transaction-manager="transactionManager">
                <batch:chunk reader="trialInputFileReader" writer="trialJdbcWriter" commit-interval="1000"/>
            </batch:tasklet>
        </batch:step>
    </batch:job>

    <bean id="trialInputFileReader" class="org.springframework.batch.item.file.FlatFileItemReader"  scope="step">
        <property name="resource" value="file:#{jobParameters[inputFile]}" />
        <property name="linesToSkip" value="1" />
        <property name="lineMapper">
            <bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
                <property name="lineTokenizer" ref="trialLineTokenizer" />
                <property name="fieldSetMapper" ref="trialFieldSetMapper" />
            </bean>
        </property>
    </bean>


    <bean id="trialLineTokenizer" class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
        <property name="delimiter" value="," />
        <property name="names"
            value="RUN_DATE,ACODE,GMI_ACCOUNT,GMI_OFFICE,GMI_FIRM,ACCOUNT_NAME,CCY,LE_ID,MARKET,GMF,MIC_CODE,GMI_PRD,PRD,PRD_DESCRIPTION,BBG_BACK_OFFICE_TICKER,BBG_FRONT_OFFICE_TICKER,BBG_YELLOW_KEY,ROOT_RIC,RIC,TRADE_TYPE,PROMPT_DATE,EXPIRY_DECLARATION,LAST_TRADED_DATE,STRIKE_PRICE,OPTION_TYPE,DELIVERY_TYPE,TRADE_PRICE,DECIMAL_TRADE_PRICE,TRIAL_TRADEPRICE,CONTRACTS,NOTIONAL_FOR_FUTURES,CLEARING_BROKER,EXEC_BROKER,TRADE_DATE,TRANSACTION_TYPE,UNDERLYING,UNDERLYING_TYPE,GMI_MULTIPLIER,UTI,USI,ISIN,AII,CFI,SERIAL,DEALER_REFERENCE,TRADE_EXECUTION_ID,CLEARING_TIMESTAMP,EXECUTION_TIMESTAMP,CCP_LE_ID,SWAP_TYPE,EFFECTIVE_DATE, COUPON_RATE,DAY_COUNT_BASIS,ROLL_FREQUENCY,RESET_FREQUENCY,ACTIVITY_TYPE,EMPTY,PRD_ID_PREFIX_1,PRD_ID_PREFIX_2,TRIAL_ENTITY_NAME,TRIAL_ENTITY_LEI,TRIAL_REGION" />
        <property name="strict" value="false"></property>
    </bean>

    <bean id="demo" class="com.mkyong.Demo"></bean>


        <bean id= "trialFieldSetMapper" class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
            <property name="prototypeBeanName" value="demo" />
        </bean>
        <bean id="trialJdbcWriter" class="org.springframework.batch.item.database.JdbcBatchItemWriter">
        <property name="dataSource" ref="dataSource" />

        <property name="sql">
            <value>
            <![CDATA[        
            Insert into TB_TRANS(RUN_DATE,ACODE,GMI_ACCOUNT,GMI_OFFICE,GMI_FIRM,ACCOUNT_NAME,CCY,LE_ID,MARKET,GMF,MIC_CODE,GMI_PRD,PRD,PRD_DESCRIPTION,BBG_BACK_OFFICE_TICKER,BBG_FRONT_OFFICE_TICKER,BBG_YELLOW_KEY,ROOT_RIC,RIC,TRADE_TYPE,PROMPT_DATE,EXPIRY_DECLARATION,LAST_TRADED_DATE,STRIKE_PRICE,OPTION_TYPE,DELIVERY_TYPE,TRADE_PRICE,DECIMAL_TRADE_PRICE,TRIAL_TRADEPRICE,CONTRACTS,NOTIONAL_FOR_FUTURES,CLEARING_BROKER,EXEC_BROKER,TRADE_DATE,TRANSACTION_TYPE,UNDERLYING,UNDERLYING_TYPE,GMI_MULTIPLIER,UTI,USI,ISIN,AII,CFI,SERIAL,DEALER_REFERENCE,TRADE_EXECUTION_ID,CLEARING_TIMESTAMP,EXECUTION_TIMESTAMP,CCP_LE_ID,SWAP_TYPE,EFFECTIVE_DATE, COUPON_RATE,DAY_COUNT_BASIS,ROLL_FREQUENCY,RESET_FREQUENCY,ACTIVITY_TYPE,EMPTY,PRD_ID_PREFIX_1,PRD_ID_PREFIX_2,TRIAL_ENTITY_NAME,TRIAL_ENTITY_LEI,TRIAL_REGION) 
            values (:RUN_DATE,:ACODE,:GMI_ACCOUNT,:GMI_OFFICE,:GMI_FIRM,:ACCOUNT_NAME,:CCY,:LE_ID,:MARKET,:GMF,:MIC_CODE,:GMI_PRD,:PRD,:PRD_DESCRIPTION,:BBG_BACK_OFFICE_TICKER,:BBG_FRONT_OFFICE_TICKER,:BBG_YELLOW_KEY,:ROOT_RIC,:RIC,:TRADE_TYPE,:PROMPT_DATE,:EXPIRY_DECLARATION,:LAST_TRADED_DATE,:STRIKE_PRICE,:OPTION_TYPE,:DELIVERY_TYPE,:TRADE_PRICE,:DECIMAL_TRADE_PRICE,:TRIAL_TRADEPRICE,:CONTRACTS,:NOTIONAL_FOR_FUTURES,:CLEARING_BROKER,:EXEC_BROKER,:TRADE_DATE,:TRANSACTION_TYPE,:UNDERLYING,:UNDERLYING_TYPE,:GMI_MULTIPLIER,:UTI,:USI,:ISIN,:AII,:CFI,:SERIAL,:DEALER_REFERENCE,:TRADE_EXECUTION_ID,:CLEARING_TIMESTAMP,:EXECUTION_TIMESTAMP,:CCP_LE_ID,:SWAP_TYPE,:EFFECTIVE_DATE,:COUPON_RATE,:DAY_COUNT_BASIS,:ROLL_FREQUENCY,:RESET_FREQUENCY,:ACTIVITY_TYPE,:EMPTY,:PRD_ID_PREFIX_1,:PRD_ID_PREFIX_2,:TRIAL_ENTITY_NAME,:TRIAL_ENTITY_LEI,:TRIAL_REGION)
            ]]>
            </value>
        </property>
        <property name="itemSqlParameterSourceProvider">
            <bean
                class="org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider" />
        </property>
    </bean>
</beans>

2,00,000条记录需要118秒。

方法2 - BufferedReader&amp; JDBC Batch

Connection connection = dbManager.getConnection();
    File file = new File(filePath);
    BufferedReader br = null;
    boolean error = false;
    ArrayList<String[]> records = new ArrayList<String[]>(batchSize * 2);
    try {
        connection.setAutoCommit(false);
        br = new BufferedReader(new FileReader(file));
        String line;
        while ((line = br.readLine()) != null) {
//      System.out.println(line);
        String[] values = line.split(",",-1);
        records.add(values);
        if (records.size() == batchSize) {
            insertToDB(records, connection);
            records.clear();
        }
        }

        if (records.size() > 0) {
        insertToDB(records, connection);
        }

    } catch (FileNotFoundException e) {
        error = true;
        e.printStackTrace();
    } catch (IOException e) {
        error = true;
        e.printStackTrace();
    } catch (SQLException e) {
        error = true;
        e.printStackTrace();
        DBManager.rollback(connection);
    } finally {
        if (br != null)
        try {
            br.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (!error) {
        DBManager.commitConnection(connection);
        }

        if (DBManager.isConnectionClosed(connection))
        DBManager.closeConnection(connection);

    }

1,00,000条记录需要3秒钟。

为什么Spring Batch Version 3很慢?我反之亦然。

1 个答案:

答案 0 :(得分:0)

将数据加载数转换为标准数,可能是工厂而不是数十亿。其他人了解数据负载计数会很有帮助。

尝试从spring-batch中删除/隐藏调试和sops,这可能会有所帮助。

我已经运行了以下记录计数的样本作业。

emp records = 2,211,840
add records = 4,423,680
spl records = 13,271,040
hibernateJobStartTime : Mon Sep 01 01:15:35 EDT 2014
hibernateJobEndTime : Mon Sep 01 02:06:55 EDT 2014