我正在使用Spring批处理将大量数据从数据库提取到CSV文件。
该程序在数据量减少的测试环境中可以正常运行,但是当我尝试在具有较大数据量(600万条记录)的生产环境中运行批处理时,由于内存不足,该程序将继续运行。通过将受影响的RAM从4 GB更改为7 GB,程序将在几分钟后结束。但是我不想运行RAM,特别是因为数据量正在急剧增加。您能帮我设置一个配置以优化内存使用并在合理的时间内完成批处理吗? 预先感谢您的帮助。
job.xml
<beans xmlns:b="http://www.springframework.org/schema/batch"
xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/batch
http://www.springframework.org/schema/batch/spring-batch.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd">
<import resource="launch-context.xml" />
<!-- increment automatique de l'id du job -->
<bean id="idIncrementer"
class="org.springframework.batch.core.launch.support.RunIdIncrementer" />
<!-- Listener -->
<bean id="extractMensJobExecutionListener" class="cnav.gipur.pci.batch.job.ExtractMensJobExecutionListener" />
<!-- <bean id="assureWriterListener" class="cnav.gipur.pci.batch.job.AssureWriterListener"/>-->
<!--Reader-->
<bean id="assureReader"
class="org.springframework.batch.item.database.JdbcPagingItemReader"
scope="step">
<property name="dataSource" ref="dataSource" />
<property name="queryProvider">
<bean class="org.springframework.batch.item.database.support.PostgresPagingQueryProvider">
<property name="selectClause" value="${query.sql.select.clause}" />
<property name="fromClause" value="${query.sql.from.clause}" />
<property name="whereClause" value="${query.sql.where.clause}"/>
<property name="groupClause" value="${query.sql.group.by.clause}"/>
<property name="sortKeys">
<map>
<entry key="${query.sql.sort.key.column}" value="${query.sql.sort.key.type}"/>
</map>
</property>
</bean>
</property>
<property name="pageSize" value="10000" />
<property name="rowMapper">
<bean class="cnav.gipur.pci.batch.reader.AssureMapper" />
</property>
</bean>
<!--beans helper s for writer-->
<bean id="fastDateFormat" class="org.apache.logging.log4j.core.util.datetime.FastDateFormat"
factory-method="getInstance">
<constructor-arg value="yyyyMMdd" />
</bean>
<bean id="currentDate" factory-bean="fastDateFormat"
factory-method="format">
<constructor-arg>
<bean class="java.util.Date" />
</constructor-arg>
</bean>
<!--Writer-->
<bean id="assureWriter" class="org.springframework.batch.item.file.FlatFileItemWriter"
scope = "step">
<property name="resource" ref = "outputResource"/>
<property name="headerCallback" ref="assureHeaderCallback"/>
<property name="lineAggregator">
<bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
<property name="delimiter" value=";"/>
<property name="fieldExtractor">
<bean class="cnav.gipur.pci.batch.writer.AssureFieldExtractor" />
</property>
</bean>
</property>
</bean>
<bean id="outputResource" class="org.springframework.core.io.FileSystemResource" >
<constructor-arg value = "./ficout/#{currentDate}_Extraction_Comptes_PCI.csv"/>
</bean>
<bean id="assureHeaderCallback" class="cnav.gipur.pci.batch.writer.AssureHeaderCallback" />
<!-- job -->
<b:job id="extractionMensJob" incrementer="idIncrementer">
<b:listeners>
<b:listener ref="extractMensJobExecutionListener" />
</b:listeners>
<b:step id="readWrite">
<b:tasklet transaction-manager="transactionManager">
<b:chunk reader="assureReader" writer="assureWriter" commit-interval=10000 />
</b:tasklet>
<!--
<b:listeners>
<b:listener ref="assureWriterListener" />
</b:listeners>
-->
</b:step>
</b:job>
</beans>
lauch-context.xml
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:p="http://www.springframework.org/schema/p" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd">
<bean id="dataSource" class="org.apache.commons.dbcp2.BasicDataSource">
<property name="driverClassName" value="${batch.jdbc.driver}" />
<property name="url" value="${batch.jdbc.url}" />
<property name="username" value="${batch.jdbc.user}" />
<property name="password" value="${batch.jdbc.password}" />
</bean>
<bean id="sessionFactory"
class="org.springframework.orm.hibernate5.LocalSessionFactoryBean">
<property name="dataSource" ref="dataSource" />
<property name="hibernateProperties">
<props>
<prop key="hibernate.hbm2ddl.auto">${hibernate.hbm2ddl.auto}</prop>
<prop key="hibernate.dialect">${hibernate.dialect}</prop>
</props>
</property>
<property name="packagesToScan" value="cnav.gipur.pci.batch.model"/>
<property name="annotatedClasses">
<list>
<value>cnav.gipur.pci.batch.model.Assure</value>
<value>cnav.gipur.pci.batch.model.Affiliation</value>
</list>
</property>
</bean>
<bean id="transactionManager" class="org.springframework.orm.hibernate5.HibernateTransactionManager" >
<property name="sessionFactory" ref="sessionFactory"/>
<property name="dataSource" ref="dataSource"/>
</bean>
<bean id="repositoryTransactionManager" class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/>
<bean id="jobRepository"
class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean"
p:transactionManager-ref="repositoryTransactionManager" />
<bean id="jobOperator"
class="org.springframework.batch.core.launch.support.SimpleJobOperator"
p:jobLauncher-ref="simpleJobLauncher" p:jobExplorer-ref="jobExplorer"
p:jobRepository-ref="jobRepository" p:jobRegistry-ref="jobRegistry" />
<bean id="jobExplorer"
class="org.springframework.batch.core.explore.support.JobExplorerFactoryBean"
p:dataSource-ref="dataSource" />
<bean id="jobRegistry"
class="org.springframework.batch.core.configuration.support.MapJobRegistry" />
<bean class="org.springframework.batch.core.configuration.support.JobRegistryBeanPostProcessor">
<property name="jobRegistry" ref="jobRegistry" />
</bean>
<bean id="simpleJobLauncher"
class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository" />
</bean>
<bean id="placeholderProperties" class="org.springframework.context.support.PropertySourcesPlaceholderConfigurer">
<property name="location" value="classpath:batch-default.properties"/>
</bean>
</beans>
春季批处理版本:
4.4.2
以下是一些实现详细信息:
读取器读取数据库中的数据。处理器中没有处理,因为所有处理都是通过SQL查询完成的。 writter将结果导出到CSV文件