我使用spring批处理来执行一些计算,在读者中我必须获得要在处理器/写入器中处理的大数据,并且此过程需要大量(RAM)。 所以我尝试使用如下的分区程序拆分步骤:
<batch:step id="MyStep.master" >
<partition step="MyStep" partitioner="MyPartitioner">
<handler grid-size="1" task-executor="TaskExecutor" />
</partition>
</batch:step>
<batch:step id="MyStep" >
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk reader="MyReader" processor="MyProcessor"
writer="MyWriter" commit-interval="1000" skip-limit="1000">
<batch:skippable-exception-classes>
<batch:include class="...FunctionalException" />
</batch:skippable-exception-classes>
</batch:chunk>
</batch:tasklet>
</batch:step>
<bean id="MyPartitioner" class="...MyPartitioner" scope="step"/>
<bean id="TaskExecutor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor" >
<bean name="MyReader"
class="org.springframework.batch.item.database.JdbcCursorItemReader"
scope="step">
<property name="dataSource" ref="dataSource" />
<property name="sql">
<value>
<![CDATA[
SELECT...
]]>
</value>
</property>
<property name="rowMapper" ref="MyRowMapper" />
</bean>
<bean id="MyRowMapper" class="...MyRowMapper" />
<bean id="dataSource" class="com.mchange.v2.c3p0.ComboPooledDataSource" destroy-method="close">
<property name="driverClass" value="org.postgresql.Driver"/>
<property name="jdbcUrl" value="jdbc:postgresql://${database.host}/${database.name}"/>
<property name="user" value="${database.user}"/>
<property name="password" value="${database.password}"/>
<property name="acquireIncrement" value="1" />
<property name="autoCommitOnClose" value="true" />
<property name="minPoolSize" value="${min.pool.size}" /> <!-- min.pool.size=5 -->
<property name="maxPoolSize" value="${max.pool.size}" /> <!-- max.pool.size=15 -->
</bean>
但是分区占用大量内存也是徒劳的,因为步骤(从属)是并行执行的,我想要做的是分割步骤并连续执行线程(不是并行)以减少内存用法(RAM),可能吗?
答案 0 :(得分:0)
问题有点老,所以我不确定这现在是否有用,可能你自己解决了。
如果您对行执行顺序没有问题,解决方案是在您的分区器bean中查询您的数据库,然后将所有信息传递给每个分区以分割您的表/(start_key,end_key)这将减少ram的使用(A LOT)。
一些警告:
这是一个例子:
XML配置:
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:batch="http://www.springframework.org/schema/batch"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:aop="http://www.springframework.org/schema/aop" xmlns:util="http://www.springframework.org/schema/util"
xsi:schemaLocation="http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop-4.0.xsd
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-4.0.xsd
http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-4.0.xsd
http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch-3.0.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-4.0.xsd">
<!-- JOB -->
<batch:job id="printPdf" job-repository="jobRepository"
restartable="false">
<batch:step id="MyStep">
<batch:partition step="MyStep.template"
partitioner="myPartitioner" handler="partitionHandler">
</batch:partition>
</batch:step>
</batch:job>
<!-- Partitioner -->
<bean id="myPartitioner" class="foo.MyPartitioner"
scope="step">
<property name="jdbcTemplate" ref="myJdbcTemplate" />
<property name="sql"
value="Select ...." />
<property name="rowMap">
<bean
class="foo.MyPartitionHandlerRowMapper" />
</property>
<property name="preparedStatementSetter">
<bean
class="org.springframework.batch.core.resource.ListPreparedStatementSetter">
<property name="parameters">
<list>
<value>#{jobParameters['param1']}</value>
</list>
</property>
</bean>
</property>
</bean>
<bean id="partitionHandler" scope="step"
class="org.springframework.batch.core.partition.support.TaskExecutorPartitionHandler">
<property name="taskExecutor" ref="customTaskExecutor" />
<property name="gridSize" value="#{jobParameters['gridSize']}" />
<property name="step" ref="MyStep.template" />
</bean>
<bean id="customTaskExecutor"
class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
<property name="corePoolSize" value="8" />
<property name="maxPoolSize" value="8" />
<property name="waitForTasksToCompleteOnShutdown" value="true" />
<property name="awaitTerminationSeconds" value="120" />
</bean>
<batch:step id="MyStep.tempate">
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk commit-interval="2500" reader="myReader"
processor="myProcessor" writer="myWriter" skip-limit="2500">
<batch:skippable-exception-classes>
<batch:include class="...FunctionalException" />
</batch:skippable-exception-classes>
</batch:chunk>
</batch:tasklet>
</batch:step>
<!-- Beans -->
<!-- Processors -->
<bean id="myProcessor" class="foo.MyProcessor"
scope="step">
</bean>
<bean id="classLoaderVerifier"
class="it.addvalue.pkjwd.services.genbean.GenericStockKeysForNoDuplicate" />
<!-- Readers -->
<bean id="myReader"
class="org.springframework.batch.item.database.JdbcCursorItemReader"
scope="step">
<property name="dataSource" ref="myDataSouce" />
<property name="sql"
value="select ... from ... where ID >= ? and ID <= ?" />
<property name="rowMapper">
<bean class="foo.MyReaderPartitionedRowMapper" />
</property>
<property name="preparedStatementSetter">
<bean
class="org.springframework.batch.core.resource.ListPreparedStatementSetter">
<property name="parameters">
<list>
<value>#{stepExecutionContext['START_ID']}</value>
<value>#{stepExecutionContext['END_ID']}</value>
</list>
</property>
</bean>
</property>
</bean>
<!-- Writers -->
<bean id="myWriter"
class="org.springframework.batch.item.database.JdbcBatchItemWriter">
<property name="assertUpdates" value="false" />
<property name="itemPreparedStatementSetter">
<bean class="foo.MyWriterStatementSetters" />
</property>
<property name="sql"
value="insert ..." />
<property name="dataSource" ref="myDataSouce" />
</bean>
</beans>
您的Partitioner Bean将如下所示:
package foo;
import foo.model.MyTable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.springframework.batch.core.partition.support.Partitioner;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.PreparedStatementSetter;
import org.springframework.jdbc.core.RowMapper;
public class MyPartitioner implements Partitioner
{
private JdbcTemplate jdbcTemplate;
private RowMapper<foo.model.MyTable> rowMap;
private String sql;
private PreparedStatementSetter preparedStatementSetter;
public JdbcTemplate getJdbcTemplate()
{
return jdbcTemplate;
}
public void setJdbcTemplate(JdbcTemplate jdbcTemplate)
{
this.jdbcTemplate = jdbcTemplate;
}
public RowMapper<foo.model.MyTable> getRowMap()
{
return rowMap;
}
public void setRowMap(RowMapper<PkjwdPolizzePartition> rowMap)
{
this.rowMap = rowMap;
}
public String getSql()
{
return sql;
}
public void setSql(String sql)
{
this.sql = sql;
}
public PreparedStatementSetter getPreparedStatementSetter()
{
return preparedStatementSetter;
}
public void setPreparedStatementSetter(PreparedStatementSetter preparedStatementSetter)
{
this.preparedStatementSetter = preparedStatementSetter;
}
@Override
public Map<String, ExecutionContext> partition(int gridSize)
{
Map<String, ExecutionContext> map = new HashMap<String, ExecutionContext>();
try
{
List<PkjwdPolizzePartition> lstMyRows = jdbcTemplate.query(sql, preparedStatementSetter ,rowMap);
if ( lstMyRows.size() > 0 )
{
int total = lstMyRows.size();
int rowsPerPartition = total / gridSize;
int leftovers = total % gridSize;
total = lstMyRows.size() - 1;
int startPos = 0;
int endPos = rowsPerPartition - 1;
int i = 0;
while (endPos <= (total))
{
ExecutionContext context = new ExecutionContext();
if ( endPos + leftovers == total )
{
endPos = total;
}
else if ( endPos >= (total) )
{
endPos = total;
}
context.put("START_ID", lstMyRows.get(startPos).getId());
context.put("END_ID", lstMyRows.get(endPos).getId());
map.put("PART_" + StringUtils.leftPad("" + i, ("" + gridSize).length(), '0'), context);
i++;
startPos = endPos + 1;
endPos = endPos + rowsPerPartition;
}
}
}
catch ( Exception e )
{
e.printStackTrace();
}
return map;
}
}