我有一个简单的Spring Batch,它从文件中读取100万条记录,然后将它打印在控制台上。
现在,我想在N台服务器上部署此批处理,比如N = 5.
如何确保所有服务器实例都没有读取相同的记录?
在 - 我如何适当地分割文件中的记录(100万/ 5)以实现优化结果?
请帮助解决代码示例。 谢谢。
答案 0 :(得分:0)
根据Michael的建议,您可以使用system命令拆分文件,然后使用MultiResourcePartitioner
并行处理拆分文件。
这就是我做的事情
@Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
ClassLoader cl = this.getClass().getClassLoader();
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(cl);
Resource[] resources = resolver.getResources("file:" + filePath + "/"+"*.csv");
partitioner.setResources(resources);
partitioner.partition(10);
return partitioner;
}
@Bean
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(4);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
@Bean
@Qualifier("masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep")
.partitioner(ProcessDataStep())
.partitioner("ProcessDataStep",partitioner())
.taskExecutor(taskExecutor())
.listener(pcStressStepListener)
.build();
}
@Bean
@Qualifier("processData")
public Step processData() {
return stepBuilderFactory.get("processData")
.<pojo, pojo> chunk(5000)
.reader(reader)
.processor(processor())
.writer(writer)
.build();
}
@Bean(name="reader")
@StepScope
public FlatFileItemReader<pojo> reader(@Value("#{stepExecutionContext['fileName']}") String filename) {
FlatFileItemReader<pojo> reader = new FlatFileItemReader<>();
reader.setResource(new UrlResource(filename));
reader.setLineMapper(new DefaultLineMapper<pojo>() {
{
setLineTokenizer(new DelimitedLineTokenizer() {
{
setNames(FILE HEADER);
}
});
setFieldSetMapper(new BeanWrapperFieldSetMapper<pojo>() {
{
setTargetType(pojo.class);
}
});
}
});
return reader;
}