如何使用弹簧批分区仅在所有服务器上执行一次分区步骤?

时间:2014-06-16 05:26:29

标签: spring spring-batch spring-integration

我正在使用spring批处理分区。我阅读交换表单文件并为每个交换进行一些处理。

交换分布在4台服务器上,使用弹簧批分区进行并行处理。

我有第一步准备带有交换ID的输入文件。我需要在所有服务器上阅读这些ID。

有没有办法在所有服务器上只运行一次以在所有服务器上准备输入文件?

我试过设置grid size = 4(服务器数量)和消费者并发1,这样在每台服务器上只有1个消费者应该听取步骤执行请求。

问题是,1个消费者处理的请求多于1个,因此在某些服务器上运行的步骤不止一次,因此不会在其他服务器上运行。结果是在某些服务器上没有准备数据,而其他步骤也失败了。

如何确保步骤仅在所有服务器上运行一次?

以下是配置

导入具有prepareExchangeListJob作为第一步的作业,该作业应如上所述,第二步是importExchanges,这是正常的分区作业。在importExchanges之后,还有许多步骤是正常的分区步骤。

<job id="importJob">
    <step id="import.prepareExchangesListStep" next="import.importExchangesStep">
        <job ref="prepareExchangesListJob" />
    </step>
    <step id="import.importExchangesStep">
        <job ref="importExchangesJob" />
        <listeners>
            <listener ref="importExchangesStepNotifier" />
        </listeners>
    </step>
</job>

PrepareExchangeList作业,请注意网格大小= 4(服务器数量)和消费者并发数= 1,这样该步骤只能在每台服务器上执行一次,以便在所有服务器上准备输入数据(交换)。

<rabbit:template id="prepareExchangesListAmqpTemplate"
    connection-factory="rabbitConnectionFactory" routing-key="prepareExchangesListQueue"
    reply-timeout="${prepare.exchanges.list.step.timeout}">
</rabbit:template>

<int:channel id="prepareExchangesListOutboundChannel">
    <int:dispatcher task-executor="taskExecutor" />
</int:channel>

<int:channel id="prepareExchangesListInboundStagingChannel" />

<amqp:outbound-gateway request-channel="prepareExchangesListOutboundChannel"
    reply-channel="prepareExchangesListInboundStagingChannel"
    amqp-template="prepareExchangesListAmqpTemplate"
    mapped-request-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS"
    mapped-reply-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS" />


<beans:bean id="prepareExchangesListMessagingTemplate"
    class="org.springframework.integration.core.MessagingTemplate"
    p:defaultChannel-ref="prepareExchangesListOutboundChannel"
    p:receiveTimeout="${prepare.exchanges.list.step.timeout}" />


<beans:bean id="prepareExchangesListPartitioner"
    class="org.springframework.batch.core.partition.support.SimplePartitioner"
    scope="step" />


<beans:bean id="prepareExchangesListPartitionHandler"
    class="org.springframework.batch.integration.partition.MessageChannelPartitionHandler"
    p:stepName="prepareExchangesListStep" p:gridSize="${prepare.exchanges.list.grid.size}"
    p:messagingOperations-ref="prepareExchangesListMessagingTemplate" />

<int:aggregator ref="prepareExchangesListPartitionHandler"
    send-partial-result-on-expiry="true"
    send-timeout="${prepare.exchanges.list.step.timeout}"
    input-channel="prepareExchangesListInboundStagingChannel" />

<amqp:inbound-gateway concurrent-consumers="1"
    request-channel="prepareExchangesListInboundChannel" reply-channel="prepareExchangesListOutboundStagingChannel"
    queue-names="prepareExchangesListQueue" connection-factory="rabbitConnectionFactory"
    mapped-request-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS"
    mapped-reply-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS" />


<int:channel id="prepareExchangesListInboundChannel" />

<int:service-activator ref="stepExecutionRequestHandler"
    input-channel="prepareExchangesListInboundChannel" output-channel="prepareExchangesListOutboundStagingChannel" />

<int:channel id="prepareExchangesListOutboundStagingChannel" />

<beans:bean id="prepareExchangesFileItemReader"
    class="org.springframework.batch.item.file.FlatFileItemReader"
    p:resource="classpath:primary_markets.txt"
    p:lineMapper-ref="stLineMapper" scope="step" />


<beans:bean id="prepareExchangesItemWriter"
    class="com.st.batch.foundation.writers.PrepareExchangesItemWriter"
    p:dirPath="${spring.tmp.batch.dir}/#{jobParameters[batch_id]}" p:numberOfFiles="4" 
    p:symfony-ref="symfonyStepScoped" scope="step" />


<step id="prepareExchangesListStep">
    <tasklet transaction-manager="transactionManager">
        <chunk reader="prepareExchangesFileItemReader" writer="prepareExchangesItemWriter" commit-interval="${prepare.exchanges.commit.interval}"/>
    </tasklet>
</step>

<job id="prepareExchangesListJob" restartable="true">
    <step id="prepareExchangesListStep.master">
        <partition partitioner="prepareExchangesListPartitioner"
            handler="prepareExchangesListPartitionHandler" />
    </step>
</job>

导入交换作业

<rabbit:template id="importExchangesAmqpTemplate"
    connection-factory="rabbitConnectionFactory" routing-key="importExchangesQueue"
    reply-timeout="${import.exchanges.partition.timeout}">
</rabbit:template>

<int:channel id="importExchangesOutboundChannel">
    <int:dispatcher task-executor="taskExecutor" />
</int:channel>

<int:channel id="importExchangesInboundStagingChannel" />

<amqp:outbound-gateway request-channel="importExchangesOutboundChannel"
    reply-channel="importExchangesInboundStagingChannel" amqp-template="importExchangesAmqpTemplate"
    mapped-request-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS"
    mapped-reply-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS" />


<beans:bean id="importExchangesMessagingTemplate"
    class="org.springframework.integration.core.MessagingTemplate"
    p:defaultChannel-ref="importExchangesOutboundChannel"
    p:receiveTimeout="${import.exchanges.partition.timeout}" />


<beans:bean id="importExchangesPartitionHandler"
    class="org.springframework.batch.integration.partition.MessageChannelPartitionHandler"
    p:stepName="importExchangesStep" p:gridSize="${import.exchanges.grid.size}"
    p:messagingOperations-ref="importExchangesMessagingTemplate" />

<int:aggregator ref="importExchangesPartitionHandler"
    send-partial-result-on-expiry="true"
    send-timeout="${import.exchanges.step.timeout}"
    input-channel="importExchangesInboundStagingChannel" />

<amqp:inbound-gateway concurrent-consumers="${import.exchanges.consumer.concurrency}"
    request-channel="importExchangesInboundChannel" reply-channel="importExchangesOutboundStagingChannel"
    queue-names="importExchangesQueue" connection-factory="rabbitConnectionFactory"
    mapped-request-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS"
    mapped-reply-headers="correlationId, sequenceNumber, sequenceSize, STANDARD_REQUEST_HEADERS" />


<int:channel id="importExchangesInboundChannel" />

<int:service-activator ref="stepExecutionRequestHandler"
    input-channel="importExchangesInboundChannel" output-channel="importExchangesOutboundStagingChannel" />

<int:channel id="importExchangesOutboundStagingChannel" />


<beans:bean id="importExchangesItemWriter"
    class="com.st.batch.foundation.writers.ImportExchangesAndEclsItemWriter"
    p:symfony-ref="symfonyStepScoped" p:timeout="${import.exchanges.item.timeout}"
    scope="step" />

<beans:bean id="importExchangesPartitioner"
    class="org.springframework.batch.core.partition.support.MultiResourcePartitioner"
    p:resources="file:${spring.tmp.batch.dir}/#{jobParameters[batch_id]}/exchanges/exchanges_*.txt"
    scope="step" />

<beans:bean id="importExchangesFileItemReader"
    class="org.springframework.batch.item.file.FlatFileItemReader"
    p:resource="#{stepExecutionContext[fileName]}" p:lineMapper-ref="stLineMapper"
    scope="step" />

<step id="importExchangesStep">
    <tasklet transaction-manager="transactionManager">
        <chunk reader="importExchangesFileItemReader" writer="importExchangesItemWriter" commit-interval="${import.exchanges.commit.interval}"/>
    </tasklet>
</step>

<job id="importExchangesJob" restartable="true">
    <step id="importExchangesStep.master">
        <partition partitioner="importExchangesPartitioner"
            handler="importExchangesPartitionHandler" />
    </step>
</job>

1 个答案:

答案 0 :(得分:0)

有趣的技巧。

我希望这四个分区能够均匀分布;兔子通常会向竞争消费者(AFAIK)进行循环分发。所以我不确定你为什么没有看到这种行为。

你可能会花一些时间试图解决这个问题,但它很脆弱,因为你依赖于此;如果其中一个奴隶有网络故障,它的分区将转到其他一个。最好让每个slave绑定到不同的队列,并通过向(第一个)出站网关添加路由密钥表达式来显式路由分区...

routing-key-expression="'foo.' + headers['sequenceNumber']"

让奴隶监听foo.1foo.2等,并继续使用公共队列进行第二步。

这假设您使用默认交换(&#34;&#34;)并按队列名称路由;如果你有明确的绑定,你可以在你的路由键表达中使用它们。

PS:提醒您,如果您的分区需要超过默认的5秒钟,则需要增加RabbitTemplate reply-timeout