我在从资源中读取文件时使用多线程步骤。让我们说我有几个要处理的文件&多线程正在处理同一个文件,因此,我不确定我的整个文件在哪个时间点被处理。 成功处理文件后,我需要存档/删除文件。有人可以指导我应该使用什么?
这是我的示例代码。
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.stream.Stream;
import javax.sql.DataSource;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecutionListener;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.core.configuration.annotation.BatchConfigurer;
import org.springframework.batch.core.configuration.annotation.DefaultBatchConfigurer;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.NonTransientResourceException;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import org.springframework.batch.support.transaction.ResourcelessTransactionManager;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
import org.springframework.core.task.TaskExecutor;
import com.iana.spring.batch.dao.GenericDAO;
import com.iana.spring.batch.listener.BatchJobCompletionListener;
@Configuration
public class BatchConfig {
@Autowired
private JobBuilderFactory jobBuilderFactory;
@Autowired
private StepBuilderFactory stepBuilderFactory;
@Autowired
private JobLauncher jobLauncher;
@Autowired
private Job processJob;
@Value("classpath*:/final/HYUMER_SI_*.txt")
private Resource[] inputFiles;
@Autowired
@Qualifier("test2DataSource")
private DataSource test2DataSource;
public void saveFileLog(String fileLog) throws Exception{
String query = "INSERT INTO FILE_LOG(LOG_INFO) VALUES (?)";
new GenericDAO().saveOrUpdate(test2DataSource, query, false, fileLog);
}
// This job runs in every 5 seconds
//@Scheduled(fixedRate = 150000000)
public void fixedRatedCallingMethod() {
try {
JobParameters jobParameters = new JobParametersBuilder()
.addLong("time", System.currentTimeMillis())
.toJobParameters();
jobLauncher.run(processJob, jobParameters);
System.out.println("I have been scheduled with Spring scheduler");
} catch (Exception e) {
e.printStackTrace();
}
}
/* In case of multiple DataSources configuration- we need to add following code.
* - It is a good practice to provide Spring Batch database as @Primary to get the benefits of all default functionalities
* implemented by Spring Batch Statistics.
* - All insert and update batch job running statistics will be maintained by Spring Batch Itself.
* - No need to write any extra line of codes.
* Error: To use the default BatchConfigurer the context must contain no more than one DataSource, found 2
*/
@Bean
BatchConfigurer configurer(@Qualifier("testDataSource") DataSource dataSource){
return new DefaultBatchConfigurer(dataSource);
}
@Bean
public Job processJob() throws Exception{
return jobBuilderFactory.get("processJob")
.incrementer(new RunIdIncrementer())
.listener(listener())
.flow(orderStep1())
.end()
.build();
}
@Bean
public TaskExecutor taskExecutor(){
SimpleAsyncTaskExecutor asyncTaskExecutor=new SimpleAsyncTaskExecutor("spring_batch");
asyncTaskExecutor.setConcurrencyLimit(20);
return asyncTaskExecutor;
}
@Bean
public ItemReader<String> batchItemReader() {
Queue<String> dataList = new LinkedList<String>();
return new ItemReader<String>() {
@BeforeStep
public void beforeStep(StepExecution stepExecution) {
System.err.println("in before step...");
try {
if(inputFiles != null) {
for (int i = 0; i < inputFiles.length; i++) {
String fileName = inputFiles[i].getFile().getAbsolutePath();
try (Stream<String> stream = Files.lines(Paths.get(fileName))) {
stream.forEach( s -> dataList.add(s));
} catch (IOException e) {
e.printStackTrace();
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("fileList Size::"+dataList.size());
}
@Override
public synchronized String read()throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {
System.out.println("--> in item reader.........");
String fileName = null;
if (dataList.size() > 0) {
fileName = dataList.remove();
file_reading_cnt++;
}
return fileName;
}
@AfterStep
public void afterStep(StepExecution stepExecution) {
System.err.println("in after step..."+file_reading_cnt);
}
};
}
volatile int file_reading_cnt = 0;
@Bean
public ItemWriter<String> batchItemWriter(){
return new ItemWriter<String>() {
@Override
public void write(List<? extends String> fileList) throws Exception {
System.out.println("----- in item writer.........");
fileList.forEach(data -> {
try {
saveFileLog(data);
} catch (Exception e) {
e.printStackTrace();
}
});
}
};
}
/**
* To create a step, reader, processor and writer has been passed serially
*
* @return
*/
@Bean
public Step orderStep1() throws Exception{
return stepBuilderFactory.get("orderStep1").<String, String>chunk(20)
.reader(batchItemReader())
.writer(batchItemWriter())
.taskExecutor(taskExecutor())
.throttleLimit(20)
.build();
}
@Bean
public JobExecutionListener listener() {
return new BatchJobCompletionListener();
}
@Bean
public ResourcelessTransactionManager transactionManager() {
return new ResourcelessTransactionManager();
}
}