Spring batch job with FlatFileItemReader hangs when big csv file is used

时间:2016-04-07 10:48:33

标签: java spring spring-batch

I am triggering a spring batch job from my web application. CSV file to be processed will be requested by user. I am using FlatFileItemReader as reader. My job configuration for reader is as below

   @Bean
    @Scope("job")
    public CustomFlatFileItemReader<ProducerMessage> csvReader(@Value("#{jobParameters[fileName]}") String fileName, @Value("#{jobParameters[s3SourceFolderPrefix]}") String s3SourceFolderPrefix, @Value("#{jobParameters[timeStamp]}") long timeStamp, ConfigurationService mongoConfigurationService) {
        CustomFlatFileItemReader faltFileReader = new CustomFlatFileItemReader();
        DefaultLineMapper<ProducerMessage> lineMapper = new DefaultLineMapper<ProducerMessage>();
        lineMapper.setLineTokenizer(new DelimitedLineTokenizer());
        CSVFieldMapper csvFieldMapper = new CSVFieldMapper(fileName, s3SourceFolderPrefix, timeStamp, mongoConfigurationService);
        lineMapper.setFieldSetMapper(csvFieldMapper);
        faltFileReader.setLineMapper(lineMapper);
        faltFileReader.setResource(new FileSystemResource(fileName));
        return faltFileReader;
    }

I am using resource as FileSystemResource does this load the whole file in memory ? When the CSV file is of big size ( more than 1 Gb ) step execution goes to an never ending loop. I could see the job in repository as STARTED status. Please help.

My step configuration is :

 @Bean(name = "csvFileStep")
    public Step csvFileStep(StepBuilderFactory stepBuilderFactory,
                               ItemReader csvReader, ItemWriter writer,
                               @Value("${reader.chunkSize}")
                               int chunkSize) {
        LOGGER.info("Step configuration loaded with chunk size {}", chunkSize);
        return stepBuilderFactory.get("step1")
                .chunk(chunkSize).reader(csvReader)
                .faultTolerant()
                .skip(CSVFieldMappingException.class)
                        //Setting the skip limit to maxValue of Int so as to skip as many mapping exceptions.
                .skipLimit(Int.MaxValue())
                .writer(writer)
                .build();
    }

My writer class :

package com.fastretailing.catalogPlatformSCMProducer.producerjob.writer.rds;

import com.fastretailing.catalogPlatformSCMProducer.model.Configuration;
import com.fastretailing.catalogPlatformSCMProducer.model.NotificationBean;
import com.fastretailing.catalogPlatformSCMProducer.model.ProducerMessage;
import com.fastretailing.catalogPlatformSCMProducer.notification.JobStatus;
import com.fastretailing.catalogPlatformSCMProducer.util.ProducerUtil;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.configuration.annotation.JobScope;
import org.springframework.batch.item.support.AbstractItemStreamItemWriter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.dao.DataAccessException;
import org.springframework.dao.EmptyResultDataAccessException;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.transaction.annotation.Transactional;

import java.util.List;

/**
 * Producer Job writer for writing directly to RDS.
 */
public class RdsWriter extends AbstractItemStreamItemWriter<ProducerMessage> {

    @Autowired
    @Qualifier("rdsJdbcTemplate")
    JdbcTemplate rdsJdbcTemplate;

    @Autowired
    Configuration configuration;

    private final Logger LOGGER = LoggerFactory.getLogger(this.getClass());

    @Override
    public void write(List<? extends ProducerMessage> list) throws Exception {

            for(ProducerMessage message : list){
                handleRecord(message);
            }
    }
    @Transactional
    public void handleRecord(ProducerMessage message) {
        try {
            String pkCheckQuery = ProducerUtil.generatePrimaryKeyCheckQuery(message);
            Long returnValue = null;
            try {
                returnValue = rdsJdbcTemplate.queryForObject(pkCheckQuery, Long.class);
            } catch (EmptyResultDataAccessException e) {
                LOGGER.debug("Primary key not exists in RDS table. This will insert new row");
            }

            if(null == returnValue || returnValue < message.getTimeStamp()){
                String query = ProducerUtil.getRdsWriteQuery(message);
                LOGGER.debug("Executing Query  {}", query);
                rdsJdbcTemplate.update(query);
                JobStatus.addRowsWritten(1);
            }else{
                JobStatus.addRowsSkippedWriting(1);
                LOGGER.debug("Skipped row due to timestamp check failure for feedName {}", message.getFeedConfigName());
            }
        } catch (Exception e) {
            JobStatus.changeStatus(ProducerUtil.SNS_NOTIFICATION_EVENT_IN_COMPLETE);
            JobStatus.addRowsSkippedWriting(1);
            JobStatus.addExceptionInLogWriter(ExceptionUtils.getStackTrace(e));
            LOGGER.error("Exception while writing records to RDS table. These records will be skipped from writing.", e);
        }
    }
}

My custom reader : ( This is same as FlatFileItemReader apart from custom exception catching in doRead method)

    /*
     * Copyright 2006-2007 the original author or authors.
     *
     * Licensed under the Apache License, Version 2.0 (the "License");
     * you may not use this file except in compliance with the License.
     * You may obtain a copy of the License at
     *
     *      http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

            package com.fastretailing.catalogPlatformSCMProducer.producerjob.reader;

            import java.io.BufferedReader;
            import java.io.IOException;
            import java.nio.charset.Charset;

            import com.fastretailing.catalogPlatformSCMProducer.exception.CSVFieldMappingException;
            import com.fastretailing.catalogPlatformSCMProducer.notification.JobStatus;
            import org.apache.commons.lang3.exception.ExceptionUtils;
            import org.apache.commons.logging.Log;
            import org.apache.commons.logging.LogFactory;
            import org.springframework.batch.item.ItemReader;
            import org.springframework.batch.item.ReaderNotOpenException;
            import org.springframework.batch.item.file.*;
            import org.springframework.batch.item.file.separator.RecordSeparatorPolicy;
            import org.springframework.batch.item.file.separator.SimpleRecordSeparatorPolicy;
            import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader;
            import org.springframework.beans.factory.InitializingBean;
            import org.springframework.core.io.Resource;
            import org.springframework.util.Assert;
            import org.springframework.util.ClassUtils;
            import org.springframework.util.StringUtils;

    /**
     * Reusing Spring's FlatFileItemReader from https://github.com/spring-projects/spring-batch/blob/master/spring-batch-infrastructure/src/main/java/org/springframework/batch/item/file/FlatFileItemReader.java
     *to change exception handling policy in case of mapping exceptions.
     *
     */
    public class CustomFlatFileItemReader<T> extends AbstractItemCountingItemStreamItemReader<T> implements
            ResourceAwareItemReaderItemStream<T>, InitializingBean {

        private static final Log logger = LogFactory.getLog(CustomFlatFileItemReader.class);

        // default encoding for input files
        public static final String DEFAULT_CHARSET = Charset.defaultCharset().name();

        private RecordSeparatorPolicy recordSeparatorPolicy = new SimpleRecordSeparatorPolicy();

        private Resource resource;

        private BufferedReader reader;

        private int lineCount = 0;

        private String[] comments = new String[] { "#" };

        private boolean noInput = false;

        private String encoding = DEFAULT_CHARSET;

        private LineMapper<T> lineMapper;

        private int linesToSkip = 0;

        private LineCallbackHandler skippedLinesCallback;

        private boolean strict = true;

        private BufferedReaderFactory bufferedReaderFactory = new DefaultBufferedReaderFactory();

        public CustomFlatFileItemReader() {
            setName(ClassUtils.getShortName(CustomFlatFileItemReader.class));
        }

        /**
         * In strict mode the reader will throw an exception on
         * {@link #open(org.springframework.batch.item.ExecutionContext)} if the input resource does not exist.
         * @param strict <code>true</code> by default
         */
        public void setStrict(boolean strict) {
            this.strict = strict;
        }

        /**
         * @param skippedLinesCallback will be called for each one of the initial skipped lines before any items are read.
         */
        public void setSkippedLinesCallback(LineCallbackHandler skippedLinesCallback) {
            this.skippedLinesCallback = skippedLinesCallback;
        }

        /**
         * Public setter for the number of lines to skip at the start of a file. Can be used if the file contains a header
         * without useful (column name) information, and without a comment delimiter at the beginning of the lines.
         *
         * @param linesToSkip the number of lines to skip
         */
        public void setLinesToSkip(int linesToSkip) {
            this.linesToSkip = linesToSkip;
        }

        /**
         * Setter for line mapper. This property is required to be set.
         * @param lineMapper maps line to item
         */
        public void setLineMapper(LineMapper<T> lineMapper) {
            this.lineMapper = lineMapper;
        }

        /**
         * Setter for the encoding for this input source. Default value is {@link #DEFAULT_CHARSET}.
         *
         * @param encoding a properties object which possibly contains the encoding for this input file;
         */
        public void setEncoding(String encoding) {
            this.encoding = encoding;
        }

        /**
         * Factory for the {@link BufferedReader} that will be used to extract lines from the file. The default is fine for
         * plain text files, but this is a useful strategy for binary files where the standard BufferedReaader from java.io
         * is limiting.
         *
         * @param bufferedReaderFactory the bufferedReaderFactory to set
         */
        public void setBufferedReaderFactory(BufferedReaderFactory bufferedReaderFactory) {
            this.bufferedReaderFactory = bufferedReaderFactory;
        }

        /**
         * Setter for comment prefixes. Can be used to ignore header lines as well by using e.g. the first couple of column
         * names as a prefix.
         *
         * @param comments an array of comment line prefixes.
         */
        public void setComments(String[] comments) {
            this.comments = new String[comments.length];
            System.arraycopy(comments, 0, this.comments, 0, comments.length);
        }

        /**
         * Public setter for the input resource.
         */
        @Override
        public void setResource(Resource resource) {
            this.resource = resource;
        }

        /**
         * Public setter for the recordSeparatorPolicy. Used to determine where the line endings are and do things like
         * continue over a line ending if inside a quoted string.
         *
         * @param recordSeparatorPolicy the recordSeparatorPolicy to set
         */
        public void setRecordSeparatorPolicy(RecordSeparatorPolicy recordSeparatorPolicy) {
            this.recordSeparatorPolicy = recordSeparatorPolicy;
        }

        /**
         * @return string corresponding to logical record according to
         * {@link #setRecordSeparatorPolicy(RecordSeparatorPolicy)} (might span multiple lines in file).
         */
        @Override
        protected T doRead() throws Exception {
            if (noInput) {
                return null;
            }

            String line = readLine();

            if (line == null) {
                return null;
            }
            else {
                try {
                    return lineMapper.mapLine(line, lineCount);
                }catch (CSVFieldMappingException e){
                    String message = "Parsing error at line: " + lineCount + " in resource=["
                            + resource.getDescription() + "], input=[" + line + "]";
                    logger.error(message,e);
                    Exception customException = new CSVFieldMappingException(message, e);
                    JobStatus.addExceptionInLogWriter(ExceptionUtils.getStackTrace(customException));
                    throw customException;
                }
                catch (Exception ex) {
                    Exception exNew =  new FlatFileParseException("Parsing error at line: " + lineCount + " in resource=["
                            + resource.getDescription() + "], input=[" + line + "]", ex, line, lineCount);
                    JobStatus.addExceptionInLogWriter(ExceptionUtils.getStackTrace(exNew));
                    throw exNew;
                }
            }
        }

        /**
         * @return next line (skip comments).getCurrentResource
         */
        private String readLine() {

            if (reader == null) {
                throw new ReaderNotOpenException("Reader must be open before it can be read.");
            }

            String line = null;

            try {
                line = this.reader.readLine();
                if (line == null) {
                    return null;
                }
                lineCount++;
                while (isComment(line)) {
                    line = reader.readLine();
                    if (line == null) {
                        return null;
                    }
                    lineCount++;
                }

                line = applyRecordSeparatorPolicy(line);
            }
            catch (IOException e) {
                // Prevent IOException from recurring indefinitely
                // if client keeps catching and re-calling
                noInput = true;
                throw new NonTransientFlatFileException("Unable to read from resource: [" + resource + "]", e, line,
                        lineCount);
            }
            return line;
        }

        private boolean isComment(String line) {
            for (String prefix : comments) {
                if (line.startsWith(prefix)) {
                    return true;
                }
            }
            return false;
        }

        @Override
        protected void doClose() throws Exception {
            lineCount = 0;
            if (reader != null) {
                reader.close();
            }
        }

        @Override
        protected void doOpen() throws Exception {
            Assert.notNull(resource, "Input resource must be set");
            Assert.notNull(recordSeparatorPolicy, "RecordSeparatorPolicy must be set");

            noInput = true;
            if (!resource.exists()) {
                if (strict) {
                    throw new IllegalStateException("Input resource must exist (reader is in 'strict' mode): " + resource);
                }
                logger.warn("Input resource does not exist " + resource.getDescription());
                return;
            }

            if (!resource.isReadable()) {
                if (strict) {
                    throw new IllegalStateException("Input resource must be readable (reader is in 'strict' mode): "
                            + resource);
                }
                logger.warn("Input resource is not readable " + resource.getDescription());
                return;
            }

            reader = bufferedReaderFactory.create(resource, encoding);
            for (int i = 0; i < linesToSkip; i++) {
                String line = readLine();
                if (skippedLinesCallback != null) {
                    skippedLinesCallback.handleLine(line);
                }
            }
            noInput = false;
        }

        @Override
        public void afterPropertiesSet() throws Exception {
            Assert.notNull(lineMapper, "LineMapper is required");
        }

        @Override
        protected void jumpToItem(int itemIndex) throws Exception {
            for (int i = 0; i < itemIndex; i++) {
                readLine();
            }
        }

        private String applyRecordSeparatorPolicy(String line) throws IOException {

            String record = line;
            while (line != null && !recordSeparatorPolicy.isEndOfRecord(record)) {
                line = this.reader.readLine();
                if (line == null) {
                    if (StringUtils.hasText(record)) {
                        // A record was partially complete since it hasn't ended but
                        // the line is null
                        throw new FlatFileParseException("Unexpected end of file before record complete", record, lineCount);
                    }
                    else {
                        // Record has no text but it might still be post processed
                        // to something (skipping preProcess since that was already
                        // done)
                        break;
                    }
                }
                else {
                    lineCount++;
                }
                record = recordSeparatorPolicy.preProcess(record) + line;
            }

            return recordSeparatorPolicy.postProcess(record);

        }

    }

I am configuring the jdbcTemplate like this in my jobConfiguration class.

 @Primary
        @Bean(name = "mysqlDs")
        @ConfigurationProperties(prefix = "datasource.sql.jobMetaDataDb")
        public DataSource sqlDataSource() {
            return DataSourceBuilder.create().build();
        }

        @Bean(name = "mysql")
        @Autowired
        public JdbcTemplate slaveJdbcTemplate(@Qualifier("mysqlDs") DataSource mysqlDs) {
            return new JdbcTemplate(mysqlDs);
        }


        @Bean(name = "rdsDataSource")
        @ConfigurationProperties(prefix = "datasource.sql.rdsWriterDb")

        public DataSource rdsDataSource() {
            return DataSourceBuilder.create().build();
        }

        @Bean(name = "rdsJdbcTemplate")
        @Autowired
        public JdbcTemplate rdsJdbcTemplate(@Qualifier("rdsDataSource") DataSource rdsDataSource) {
            return new JdbcTemplate(rdsDataSource);
        }

0 个答案:

没有答案