我有一个包含80个String列的文件,其中包含大约2个Lacs记录。 我正在评估将来使用的技术我们将获得2000 Lacs记录文件。 我评估了两件事
Oracle 11 g数据库
方法1 - 春季批次
配置如下
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:batch="http://www.springframework.org/schema/batch" xmlns:task="http://www.springframework.org/schema/task"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/batch
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">
<batch:job id="trialDataLoadJob">
<batch:step id="step1">
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk reader="trialInputFileReader" writer="trialJdbcWriter" commit-interval="1000"/>
</batch:tasklet>
</batch:step>
</batch:job>
<bean id="trialInputFileReader" class="org.springframework.batch.item.file.FlatFileItemReader" scope="step">
<property name="resource" value="file:#{jobParameters[inputFile]}" />
<property name="linesToSkip" value="1" />
<property name="lineMapper">
<bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
<property name="lineTokenizer" ref="trialLineTokenizer" />
<property name="fieldSetMapper" ref="trialFieldSetMapper" />
</bean>
</property>
</bean>
<bean id="trialLineTokenizer" class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
<property name="delimiter" value="," />
<property name="names"
value="RUN_DATE,ACODE,GMI_ACCOUNT,GMI_OFFICE,GMI_FIRM,ACCOUNT_NAME,CCY,LE_ID,MARKET,GMF,MIC_CODE,GMI_PRD,PRD,PRD_DESCRIPTION,BBG_BACK_OFFICE_TICKER,BBG_FRONT_OFFICE_TICKER,BBG_YELLOW_KEY,ROOT_RIC,RIC,TRADE_TYPE,PROMPT_DATE,EXPIRY_DECLARATION,LAST_TRADED_DATE,STRIKE_PRICE,OPTION_TYPE,DELIVERY_TYPE,TRADE_PRICE,DECIMAL_TRADE_PRICE,TRIAL_TRADEPRICE,CONTRACTS,NOTIONAL_FOR_FUTURES,CLEARING_BROKER,EXEC_BROKER,TRADE_DATE,TRANSACTION_TYPE,UNDERLYING,UNDERLYING_TYPE,GMI_MULTIPLIER,UTI,USI,ISIN,AII,CFI,SERIAL,DEALER_REFERENCE,TRADE_EXECUTION_ID,CLEARING_TIMESTAMP,EXECUTION_TIMESTAMP,CCP_LE_ID,SWAP_TYPE,EFFECTIVE_DATE, COUPON_RATE,DAY_COUNT_BASIS,ROLL_FREQUENCY,RESET_FREQUENCY,ACTIVITY_TYPE,EMPTY,PRD_ID_PREFIX_1,PRD_ID_PREFIX_2,TRIAL_ENTITY_NAME,TRIAL_ENTITY_LEI,TRIAL_REGION" />
<property name="strict" value="false"></property>
</bean>
<bean id="demo" class="com.mkyong.Demo"></bean>
<bean id= "trialFieldSetMapper" class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
<property name="prototypeBeanName" value="demo" />
</bean>
<bean id="trialJdbcWriter" class="org.springframework.batch.item.database.JdbcBatchItemWriter">
<property name="dataSource" ref="dataSource" />
<property name="sql">
<value>
<![CDATA[
Insert into TB_TRANS(RUN_DATE,ACODE,GMI_ACCOUNT,GMI_OFFICE,GMI_FIRM,ACCOUNT_NAME,CCY,LE_ID,MARKET,GMF,MIC_CODE,GMI_PRD,PRD,PRD_DESCRIPTION,BBG_BACK_OFFICE_TICKER,BBG_FRONT_OFFICE_TICKER,BBG_YELLOW_KEY,ROOT_RIC,RIC,TRADE_TYPE,PROMPT_DATE,EXPIRY_DECLARATION,LAST_TRADED_DATE,STRIKE_PRICE,OPTION_TYPE,DELIVERY_TYPE,TRADE_PRICE,DECIMAL_TRADE_PRICE,TRIAL_TRADEPRICE,CONTRACTS,NOTIONAL_FOR_FUTURES,CLEARING_BROKER,EXEC_BROKER,TRADE_DATE,TRANSACTION_TYPE,UNDERLYING,UNDERLYING_TYPE,GMI_MULTIPLIER,UTI,USI,ISIN,AII,CFI,SERIAL,DEALER_REFERENCE,TRADE_EXECUTION_ID,CLEARING_TIMESTAMP,EXECUTION_TIMESTAMP,CCP_LE_ID,SWAP_TYPE,EFFECTIVE_DATE, COUPON_RATE,DAY_COUNT_BASIS,ROLL_FREQUENCY,RESET_FREQUENCY,ACTIVITY_TYPE,EMPTY,PRD_ID_PREFIX_1,PRD_ID_PREFIX_2,TRIAL_ENTITY_NAME,TRIAL_ENTITY_LEI,TRIAL_REGION)
values (:RUN_DATE,:ACODE,:GMI_ACCOUNT,:GMI_OFFICE,:GMI_FIRM,:ACCOUNT_NAME,:CCY,:LE_ID,:MARKET,:GMF,:MIC_CODE,:GMI_PRD,:PRD,:PRD_DESCRIPTION,:BBG_BACK_OFFICE_TICKER,:BBG_FRONT_OFFICE_TICKER,:BBG_YELLOW_KEY,:ROOT_RIC,:RIC,:TRADE_TYPE,:PROMPT_DATE,:EXPIRY_DECLARATION,:LAST_TRADED_DATE,:STRIKE_PRICE,:OPTION_TYPE,:DELIVERY_TYPE,:TRADE_PRICE,:DECIMAL_TRADE_PRICE,:TRIAL_TRADEPRICE,:CONTRACTS,:NOTIONAL_FOR_FUTURES,:CLEARING_BROKER,:EXEC_BROKER,:TRADE_DATE,:TRANSACTION_TYPE,:UNDERLYING,:UNDERLYING_TYPE,:GMI_MULTIPLIER,:UTI,:USI,:ISIN,:AII,:CFI,:SERIAL,:DEALER_REFERENCE,:TRADE_EXECUTION_ID,:CLEARING_TIMESTAMP,:EXECUTION_TIMESTAMP,:CCP_LE_ID,:SWAP_TYPE,:EFFECTIVE_DATE,:COUPON_RATE,:DAY_COUNT_BASIS,:ROLL_FREQUENCY,:RESET_FREQUENCY,:ACTIVITY_TYPE,:EMPTY,:PRD_ID_PREFIX_1,:PRD_ID_PREFIX_2,:TRIAL_ENTITY_NAME,:TRIAL_ENTITY_LEI,:TRIAL_REGION)
]]>
</value>
</property>
<property name="itemSqlParameterSourceProvider">
<bean
class="org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider" />
</property>
</bean>
</beans>
2,00,000条记录需要118秒。
方法2 - BufferedReader&amp; JDBC Batch
Connection connection = dbManager.getConnection();
File file = new File(filePath);
BufferedReader br = null;
boolean error = false;
ArrayList<String[]> records = new ArrayList<String[]>(batchSize * 2);
try {
connection.setAutoCommit(false);
br = new BufferedReader(new FileReader(file));
String line;
while ((line = br.readLine()) != null) {
// System.out.println(line);
String[] values = line.split(",",-1);
records.add(values);
if (records.size() == batchSize) {
insertToDB(records, connection);
records.clear();
}
}
if (records.size() > 0) {
insertToDB(records, connection);
}
} catch (FileNotFoundException e) {
error = true;
e.printStackTrace();
} catch (IOException e) {
error = true;
e.printStackTrace();
} catch (SQLException e) {
error = true;
e.printStackTrace();
DBManager.rollback(connection);
} finally {
if (br != null)
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
if (!error) {
DBManager.commitConnection(connection);
}
if (DBManager.isConnectionClosed(connection))
DBManager.closeConnection(connection);
}
1,00,000条记录需要3秒钟。
为什么Spring Batch Version 3很慢?我反之亦然。
答案 0 :(得分:0)
将数据加载数转换为标准数,可能是工厂而不是数十亿。其他人了解数据负载计数会很有帮助。
尝试从spring-batch中删除/隐藏调试和sops,这可能会有所帮助。
我已经运行了以下记录计数的样本作业。
emp records = 2,211,840
add records = 4,423,680
spl records = 13,271,040
hibernateJobStartTime : Mon Sep 01 01:15:35 EDT 2014
hibernateJobEndTime : Mon Sep 01 02:06:55 EDT 2014