我有以下带有批处理步骤的spring批处理作业,该步骤为分区步骤创建3600个分区。我使用的ThreadPoolTaskExecutor的最大池大小为100,队列容量为100(尽管这对速度没有影响)。我正在使用Visual VM监视线程,我注意到taskExecutor线程直到开始作业后超过5分钟才启动。
奇怪的是,如果我将分区数限制为100,则线程会很快启动并在大约一分钟内完成。
我注意到的另一个问题是,VisualVM 线程可视化中似乎没有一个以上的数据库连接
有人可以在下面查看我的批处理作业,并告诉我是否缺少将数据库连接数限制为1的内容?另外,如果我的ThreadPoolTaskExecutor参数没有更改,为什么添加更多分区会影响性能?难道这些工作不应该排在队列中,直到有一个线程可以为他们提供服务?
---春季批处理作业---
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:batch="http://www.springframework.org/schema/batch"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:util="http://www.springframework.org/schema/util"
xmlns:lang="http://www.springframework.org/schema/lang"
xsi:schemaLocation="http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch-3.0.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.0.xsd
http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-3.0.xsd
http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang-4.0.xsd">
<context:property-placeholder location="${ext.properties.dataManipulation.Properties}"/>
<import resource="${ext.properties.dataManipulation.Connection}"/>
<import resource="flatFileLineProperties.xml"/>
<!-- JobRepository and JobLauncher are configuration/setup classes -->
<bean id="jobRepository"
class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean"/>
<bean id="transactionManager"
class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/>
<bean id="jobLauncher"
class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository"/>
</bean>
<util:map id="keys" map-class="java.util.HashMap">
<entry key="SAILING_ID" value="ASCENDING" value-type="org.springframework.batch.item.database.Order"/>
<entry key="RES_ID" value="ASCENDING" value-type="org.springframework.batch.item.database.Order" />
</util:map>
<!-- Here is my partioned step -->
<bean id="reservationsItemReader" class="org.springframework.batch.item.database.JdbcPagingItemReader" scope="step">
<property name="dataSource" ref="dataSource" />
<property name="queryProvider">
<bean class="org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean">
<property name="dataSource" ref="dataSource" />
<property name="selectClause">
<value>
<![CDATA[
GUEST_ID,
FIRST_NAME,
LAST_NAME,
TITLE,
HOUSEHOLD_NAME,
SAILING_ID,
RES_ID
]]>
</value>
</property>
<property name="fromClause" value="FROM RESERVATION "/>
<property name="whereClause" >
<value>
<![CDATA[ AND SAIL_ID = :sailId
]]>
</value>
</property>
<!--<property name="sortKey" value="SAILING_ID" />-->
<property name="sortKeys" ref="keys"/>
<!--<property name="sortKeys" ref="sortKeys"/>-->
</bean>
</property>
<property name="parameterValues">
<map>
<!--<entry key="shipCode" value="#{stepExecutionContext[shipCode]}" />-->
<entry key="sailId" value="#{stepExecutionContext[sailId]}" />
</map>
</property>
<!--property name="pageSize" value="500000" /-->
<property name="pageSize" value="40000" />
<property name="rowMapper">
<bean class="com.ncl.endeca.mapper.ColumnToHashMapper" />
</property>
</bean>
<bean id="sortKeys" class="java.util.HashMap" scope="prototype" >
<constructor-arg>
<map key-type="java.lang.String" value-type="org.springframework.batch.item.database.Order">
<entry key="SAILING_ID" value="ASCENDING" />
<entry key="RES_ID" value="ASCENDING" />
</map>
</constructor-arg>
</bean>
<util:list id="client_fields" value-type="java.lang.String">
<value>FIRST_NAME</value>
<value>LAST_NAME</value>
<value>TITLE</value>
<value>HOUSEHOLD_NAME</value>
</util:list>
<bean id="reservationsItemWriter" class="com.ncl.endeca.writer.ReservationWriter" scope="step">
<property name="guestFields" ref="client_fields" />
<property name="outPrefix" value="${file.out.prefix}" />
<property name="shipCode" value="#{stepExecutionContext[shipCode]}" />
<property name="sailId" value="#{stepExecutionContext[sailId]}" />
<property name="soldOutSailings" ref="soldOutSailingsList" />
</bean>
<bean id="yearsAgo" class="java.lang.Integer">
<constructor-arg>
<value>${yearsAgo}</value>
</constructor-arg>
</bean>
<bean id="yearsAhead" class="java.lang.Integer">
<constructor-arg>
<value>${yearsAhead}</value>
</constructor-arg>
</bean>
<bean id="resPartitioner" class="com.ncl.endeca.partition.ReservationPartitioner">
<property name="yearsAgo" ref="yearsAgo" />
<property name="yearsAhead" ref="yearsAhead" />
<property name="batchLimit" value="${batch.limit}" />
<property name="dataSource" ref="dataSource"/>
</bean>
<bean id="taskExecutor"
class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
<property name="corePoolSize" value="${batch.corePoolSize}" />
<property name="maxPoolSize" value="${batch.maxPoolSize}" />
<property name="queueCapacity" value="${batch.queueCapacity}" />
</bean>
<!--<bean id="taskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor"/>-->
<!-- each thread will run this job, with different stepExecutionContext values. -->
<step id="slave" xmlns="http://www.springframework.org/schema/batch" >
<flow parent="readReservations"/>
</step>
<!--<bean id="countrySpecificCompletionPolicy" class="org.springframework.batch.core.resource.StepExecutionSimpleCompletionPolicy">-->
<!--<property name="keyName" value="sailId"/>-->
<!--</bean>-->
<batch:flow id="readReservations">
<batch:step id="reservations" xmlns="http://www.springframework.org/schema/batch" >
<tasklet throttle-limit="${batch.corePoolSize}">
<chunk reader="reservationsItemReader" writer="reservationsItemWriter" commit-interval="50000" />
</tasklet>
</batch:step>
</batch:flow>
<!-- Actual Job -->
<batch:job id="dataManipulationJob">
<batch:step id="masterStep">
<batch:partition step="slave" partitioner="resPartitioner">
<batch:handler grid-size="100" task-executor="taskExecutor" />
</batch:partition>
</batch:step>
</batch:job>
我已经尝试过BasicDataSource和Hikari连接,但是在监视VisualVM时池大小没有影响
---- connection.xml ----
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:jdbc="http://www.springframework.org/schema/jdbc"
xmlns:batch="http://www.springframework.org/schema/batch"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:util="http://www.springframework.org/schema/util"
xmlns:lang="http://www.springframework.org/schema/lang"
xsi:schemaLocation="http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch-3.0.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.0.xsd
http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-3.0.xsd
http://www.springframework.org/schema/lang http://www.springframework.org/schema/lang/spring-lang-4.0.xsd">
<bean id="hikariConfig" class="com.zaxxer.hikari.HikariConfig">
<property name="poolName" value="springHikariCP" />
<property name="connectionTestQuery" value="SELECT 10 from dual" />
<property name="dataSourceClassName" value="${hibernate.dataSourceClassName}" />
<property name="maximumPoolSize" value="${batch.maxPoolSize}" />
<property name="idleTimeout" value="${hibernate.hikari.idleTimeout}" />
<property name="dataSourceProperties">
<props>
<prop key="url">${dataSource.url}</prop>
<prop key="user">${dataSource.username}</prop>
<prop key="password">${dataSource.password}</prop>
</props>
</property>
</bean>
<!-- HikariCP configuration -->
<!--<bean id="dataSource" class="com.zaxxer.hikari.HikariDataSource" destroy-method="close">-->
<!--<constructor-arg ref="hikariConfig" />-->
<!--</bean>-->
<!--<bean id="dataSource" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close" scope="step">-->
<!--<property name="driverClassName" value="${hibernate.dataSourceClassName}" />-->
<!--<property name="url" value="${dataSource.url}" />-->
<!--<property name="username" value="${dataSource.username}" />-->
<!--<property name="password" value="${dataSource.password}" />-->
<!--<property name="testWhileIdle" value="false"/>-->
<!--<property name="maxActive" value="${batch.corePoolSize}"/>-->
<!--</bean>-->
<!-- connect to database -->
<bean id="dataSource"
class="org.springframework.jdbc.datasource.DriverManagerDataSource" >
<property name="driverClassName" value="oracle.jdbc.OracleDriver" />
<property name="url" value="jdbc:oracle:thin:@******" />
<property name="username" value="****" />
<property name="password" value="****" />
</bean>
答案 0 :(得分:0)
创建分区的时间取决于ReservationPartitioner
的性能以及分区的数量。创建3600分区意味着创建3600 StepExecution
/ ExecutionContext
对象并将它们持久保存在相应的表中。如此大量的分区可能需要花费时间。
关于数据库连接,您正在将MapJobRepositoryFactoryBean
与ResourcelessTransactionManager
一起使用,因此与Spring Batch元数据的数据库没有交互。根据您的配置与数据库交互的唯一组件是JdbcPagingItemReader
(我不知道您的ReservationWriter
的类型是什么),因此看到单个数据库连接就不足为奇了。 / p>