Spring Batch分区步骤

时间:2015-07-02 01:19:24

标签: java spring spring-batch

我有多个CSV文件要阅读。我希望一次完成一个文件的处理。而不是阅读所有记录,直到达到提交级别。

我已经整理了一个使用分区的作业但是在运行作业时我发现每行有两个条目。好像工作正在运行两次。

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:context="http://www.springframework.org/schema/context" xmlns:p="http://www.springframework.org/schema/p"
    xmlns:batch="http://www.springframework.org/schema/batch" xmlns:mvc="http://www.springframework.org/schema/mvc"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.springframework.org/schema/beans  
http://www.springframework.org/schema/beans/spring-beans-4.0.xsd  
http://www.springframework.org/schema/context  
http://www.springframework.org/schema/context/spring-context-4.0.xsd  
http://www.springframework.org/schema/mvc  
http://www.springframework.org/schema/mvc/spring-mvc-4.0.xsd  
http://www.springframework.org/schema/batch   
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd">

    <import resource="classpath:/database.xml" />



     <bean id="asyncTaskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor" >
     <property name="concurrencyLimit" value="1"></property>
     </bean>  

     <bean id="taskExecutor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
        <property name="corePoolSize" value="5" />
    </bean>

     <bean id="partitioner" class="org.springframework.batch.core.partition.support.MultiResourcePartitioner" scope="step">
        <property name="resources" value="file:#{jobParameters[filePath]}/*.dat" />
    </bean>

    <bean id="multiResourceReader"
        class="org.springframework.batch.item.file.MultiResourceItemReader"
        scope="step">
        <property name="resources" value="file:#{jobParameters[filePath]}/*.dat"></property>
        <property name="delegate" ref="logItFileReader"></property>
    </bean>



    <batch:job id="remediationJob">
        <batch:step id="partitionedStep" >
            <batch:partition step="readWriteContactsPartitionedStep" partitioner="partitioner">
                <batch:handler task-executor="asyncTaskExecutor" />
            </batch:partition>
        </batch:step>
    </batch:job>

    <batch:step id="readWriteContactsPartitionedStep">
        <batch:tasklet>
            <batch:transaction-attributes isolation="READ_UNCOMMITTED"/>
            <batch:chunk reader="multiResourceReader" writer="rawItemDatabaseWriter" commit-interval="10" skip-policy="pdwUploadSkipPolicy"/>
        <batch:listeners>
                    <batch:listener ref="customItemReaderListener"></batch:listener>
                    <batch:listener ref="csvLineSkipListener"></batch:listener>
                    <batch:listener ref="getCurrentResourceChunkListener"></batch:listener>

                </batch:listeners>
        </batch:tasklet>    
    </batch:step>


    <bean id="logItFileReader" class="org.springframework.batch.item.file.FlatFileItemReader" scope="step">
        <!-- Read a csv file -->

        <property name="strict" value="false"></property>
        <property name="lineMapper">
            <bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
                <!-- split it -->
                <property name="lineTokenizer">
                    <bean
                        class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
                        <property name="delimiter" value="@##@" />
                        <property name="strict" value="true" />
                    </bean>
                </property>
                <property name="fieldSetMapper">
                    <!-- map to an object -->
                    <bean class="org.kp.oppr.remediation.batch.vo.CSVDataVOFieldMapper">

                    </bean>
                </property>
            </bean>
        </property>
    </bean>

    <bean id="rawItemDatabaseWriter" class="org.kp.oppr.remediation.batch.csv.RawItemDatabaseWriter"
        scope="step">
    </bean>

    <bean id="pdwUploadSkipPolicy"
        class="org.springframework.batch.core.step.skip.AlwaysSkipItemSkipPolicy" />

    <bean id="csvDataVO" class="org.kp.oppr.remediation.batch.vo.CSVDataVO"
        scope="prototype"></bean>


    <!-- BATCH LISTENERS -->

    <bean id="pdwFileMoverListener"
        class="org.kp.oppr.remediation.batch.listener.PdwFileMoverListener"
        scope="step">
    </bean>

    <bean id="csvLineSkipListener"
        class="org.kp.oppr.remediation.batch.listener.CSVLineSkipListener"
        scope="step">
    </bean>

    <bean id="customItemReaderListener"
        class="org.kp.oppr.remediation.batch.listener.CustomItemReaderListener"></bean>

     <bean id="getCurrentResourceChunkListener" 
          class="org.kp.oppr.remediation.batch.listener.GetCurrentResourceChunkListener">
        <property name="proxy" ref ="multiResourceReader" />
    </bean>
    <!-- 
    <bean id="stepListener" class="org.kp.oppr.remediation.batch.listener.ExampleStepExecutionListener">
        <property name="resources" ref="multiResourceReader"/>
    </bean>
     -->
    <!-- Skip Policies -->

</beans>  

我在这里缺少什么?

1 个答案:

答案 0 :(得分:0)

那么你有2个问题

1 - &#34;我希望一次完成一个文件的处理。而不是阅读所有记录,直到达到提交级别。&#34;  将Commit-Interval设置为1 - 它将读取一个项目,处理它,编写器将等到它有一个要写入的项目。

2 - 好像作业正在运行两次。

看起来它的运行次数与您拥有的文件数量相同。

此步骤不应使用MultiResourceItemReader。  分区程序将资源拆分为多个并创建单独的执行上下文。 由于设置了资源属性,MultiResourceItemReader再次考虑所有文件。