Memory issue when reading large excel using POI

时间:2016-03-02 10:55:09

标签: java excel apache-poi sax export-to-csv

I am working on requirement where I need to read and convert large xlsx to csv file. I need to read up to specific number of cells ( predefined) in each row. The input xlsx sheet contains 1 Million records and up to 150 MB size. I have used POI API . Its working fine with small xlsx files and for large files its throwing Java heap space error. Please look at below code and provide the solution. I am not much aware of Event/SAXParser api and assuming that it will be suitable for reading large xls files.

package test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Iterator;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class XLSXToCSVConverterNew {


    public static void xlsx(File inputFile, File outputFile,int expectedColumns) {
        // For storing data into CSV files
       StringBuffer data = new StringBuffer();
       String separetorString="|";

        try {
            if(outputFile.exists())
                outputFile.delete();
            FileOutputStream fos = new FileOutputStream(outputFile);
            XSSFWorkbook wBook = new XSSFWorkbook(new FileInputStream(inputFile));
            XSSFSheet sheet = wBook.getSheetAt(0);
            Row row;
            System.out.println("Number of rows in sheet are :"+sheet.getLastRowNum());
            Iterator<Row> rowIterator = sheet.iterator();
            while (rowIterator.hasNext()) {
                row = rowIterator.next();
//check whether content is there in atleast one cell out of expected columns count
                if (containsValue(row, 0, expectedColumns-1) == true){  
                for(int i=0; i<=expectedColumns-1; i++) {
                    Cell cell = row.getCell(i);

                    switch (cell.getCellType()) {
                       case Cell.CELL_TYPE_BOOLEAN:
                       cell.setCellType(Cell.CELL_TYPE_STRING);
                          if(i!=expectedColumns-1)
                        data.append(cell.getStringCellValue() + separetorString);                          
                       else
                        data.append(cell.getStringCellValue() );

                            break;
                        case Cell.CELL_TYPE_NUMERIC:
                             cell.setCellType(Cell.CELL_TYPE_STRING);
                             if(i!=expectedColumns-1)
                            data.append(cell.getStringCellValue() + separetorString);
                            else
                                data.append(cell.getStringCellValue() );    
                            break;
                        case Cell.CELL_TYPE_STRING:
                            if(i!=expectedColumns-1)
                            data.append(cell.getStringCellValue() + separetorString);
                            else
                                data.append(cell.getStringCellValue() );
                            break;

                        case Cell.CELL_TYPE_BLANK:
                            if(i!=expectedColumns-1)
                            data.append("" + separetorString);
                            else
                                 data.append("");
                            break;
                        default:
                            if(i!=expectedColumns-1)
                            data.append(cell + separetorString);
                            else                            
                                data.append(cell);
                    }
                }
                if(data.length()!=0){
                    data.append('\n'); 
                   fos.write(data.toString().getBytes());
                   data.delete(0, data.length());
                    }
            }


            }
            fos.close();

        } catch (Exception ioe) {
            ioe.printStackTrace();
        }
    }


    public static boolean containsValue(Row row, int fcell, int lcell) {
        boolean flag = false;
        for (int i = fcell; i < lcell; i++) {
            if (StringUtils.isEmpty(String.valueOf(row.getCell(i))) == true
                    || StringUtils.isWhitespace(String.valueOf(row.getCell(i))) == true
                    || StringUtils.isBlank(String.valueOf(row.getCell(i))) == true
                    || String.valueOf(row.getCell(i)).length() == 0 || row.getCell(i) == null) {
            } else {
                flag = true;
            }
        }
        return flag;
    }

    public static void main(String[] args) {
        File inputFile = new File("/home/mypc/Desktop/poi/test.xlsx");
        File outputFile = new File("/home/mypc/Desktop/poi/test.csv");
        xlsx(inputFile, outputFile,30);
    }

}

0 个答案:

没有答案