Question

我在从资源中读取文件时使用多线程步骤。让我们说我有几个要处理的文件＆amp;多线程正在处理同一个文件，因此，我不确定我的整个文件在哪个时间点被处理。成功处理文件后，我需要存档/删除文件。有人可以指导我应该使用什么？

这是我的示例代码。

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.stream.Stream;

import javax.sql.DataSource;

import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecutionListener;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.core.configuration.annotation.BatchConfigurer;
import org.springframework.batch.core.configuration.annotation.DefaultBatchConfigurer;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.NonTransientResourceException;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import org.springframework.batch.support.transaction.ResourcelessTransactionManager;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
import org.springframework.core.task.TaskExecutor;

import com.iana.spring.batch.dao.GenericDAO;
import com.iana.spring.batch.listener.BatchJobCompletionListener;

@Configuration
public class BatchConfig {

    @Autowired
    private JobBuilderFactory jobBuilderFactory;

    @Autowired
    private StepBuilderFactory stepBuilderFactory;

    @Autowired
    private JobLauncher jobLauncher;


    @Autowired
    private Job processJob;

    @Value("classpath*:/final/HYUMER_SI_*.txt")
    private Resource[] inputFiles;

    @Autowired
    @Qualifier("test2DataSource")
    private DataSource test2DataSource;

    public void saveFileLog(String fileLog) throws Exception{
        String query = "INSERT INTO FILE_LOG(LOG_INFO) VALUES (?)";
        new GenericDAO().saveOrUpdate(test2DataSource, query, false, fileLog);
    }

    // This job runs in every 5 seconds
    //@Scheduled(fixedRate = 150000000)
    public void fixedRatedCallingMethod() {
        try {

            JobParameters jobParameters = new JobParametersBuilder()
                    .addLong("time", System.currentTimeMillis())
                    .toJobParameters();
            jobLauncher.run(processJob, jobParameters);
            System.out.println("I have been scheduled with Spring scheduler");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    /* In case of multiple DataSources configuration- we need to add following code.
     *  - It is a good practice to provide Spring Batch database as @Primary to get the benefits of all default functionalities 
     *    implemented by Spring Batch Statistics.
     *  - All insert and update batch job running statistics will be maintained by Spring Batch Itself. 
     *  - No need to write any extra line of codes.
     *   Error: To use the default BatchConfigurer the context must contain no more than one DataSource, found 2
     */
     @Bean
     BatchConfigurer configurer(@Qualifier("testDataSource") DataSource dataSource){
       return new DefaultBatchConfigurer(dataSource);
     }


    @Bean
    public Job processJob() throws Exception{

        return jobBuilderFactory.get("processJob")
                .incrementer(new RunIdIncrementer())
                .listener(listener())
                .flow(orderStep1())
                .end()
                .build();

    }

    @Bean
    public TaskExecutor taskExecutor(){
        SimpleAsyncTaskExecutor asyncTaskExecutor=new SimpleAsyncTaskExecutor("spring_batch");
        asyncTaskExecutor.setConcurrencyLimit(20);
        return asyncTaskExecutor;
    }


    @Bean
    public ItemReader<String> batchItemReader() {

        Queue<String> dataList = new LinkedList<String>();

        return new ItemReader<String>() {

            @BeforeStep
            public void beforeStep(StepExecution stepExecution) {
                System.err.println("in before step...");
                try {
                    if(inputFiles != null) {
                        for (int i = 0; i < inputFiles.length; i++) {

                            String fileName = inputFiles[i].getFile().getAbsolutePath();
                            try (Stream<String> stream = Files.lines(Paths.get(fileName))) {
                                stream.forEach( s -> dataList.add(s));

                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
                System.out.println("fileList Size::"+dataList.size());
            }

            @Override
            public synchronized String read()throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {

                System.out.println("--> in item reader.........");
                String fileName = null;
                if (dataList.size() > 0) {
                    fileName = dataList.remove();
                    file_reading_cnt++;
                }
                return fileName;
            }
            @AfterStep
            public void afterStep(StepExecution stepExecution) {
                System.err.println("in after step..."+file_reading_cnt);
            }
        };
    }

    volatile int file_reading_cnt = 0;

    @Bean
    public ItemWriter<String> batchItemWriter(){

        return new ItemWriter<String>() {

            @Override
            public void write(List<? extends String> fileList) throws Exception {
                System.out.println("----- in item writer.........");
                fileList.forEach(data -> {
                    try {
                        saveFileLog(data);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                });
            }
        };

    }


    /**
     * To create a step, reader, processor and writer has been passed serially
     * 
     * @return
     */
    @Bean
    public Step orderStep1() throws Exception{
        return stepBuilderFactory.get("orderStep1").<String, String>chunk(20)
                .reader(batchItemReader())
                .writer(batchItemWriter())
                .taskExecutor(taskExecutor())
                .throttleLimit(20)
                .build();
    }

    @Bean
    public JobExecutionListener listener() {
        return new BatchJobCompletionListener();
    }

    @Bean
    public ResourcelessTransactionManager transactionManager() {
        return new ResourcelessTransactionManager();
    }

}

如何在Spring批处理的多线程步骤中存档已处理的文件？

0 个答案: