I am working on requirement where I need to read and convert large xlsx to csv file. I need to read up to specific number of cells ( predefined) in each row. The input xlsx sheet contains 1 Million records and up to 150 MB size. I have used POI API . Its working fine with small xlsx files and for large files its throwing Java heap space error. Please look at below code and provide the solution. I am not much aware of Event/SAXParser api and assuming that it will be suitable for reading large xls files.
package test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Iterator;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class XLSXToCSVConverterNew {
public static void xlsx(File inputFile, File outputFile,int expectedColumns) {
// For storing data into CSV files
StringBuffer data = new StringBuffer();
String separetorString="|";
try {
if(outputFile.exists())
outputFile.delete();
FileOutputStream fos = new FileOutputStream(outputFile);
XSSFWorkbook wBook = new XSSFWorkbook(new FileInputStream(inputFile));
XSSFSheet sheet = wBook.getSheetAt(0);
Row row;
System.out.println("Number of rows in sheet are :"+sheet.getLastRowNum());
Iterator<Row> rowIterator = sheet.iterator();
while (rowIterator.hasNext()) {
row = rowIterator.next();
//check whether content is there in atleast one cell out of expected columns count
if (containsValue(row, 0, expectedColumns-1) == true){
for(int i=0; i<=expectedColumns-1; i++) {
Cell cell = row.getCell(i);
switch (cell.getCellType()) {
case Cell.CELL_TYPE_BOOLEAN:
cell.setCellType(Cell.CELL_TYPE_STRING);
if(i!=expectedColumns-1)
data.append(cell.getStringCellValue() + separetorString);
else
data.append(cell.getStringCellValue() );
break;
case Cell.CELL_TYPE_NUMERIC:
cell.setCellType(Cell.CELL_TYPE_STRING);
if(i!=expectedColumns-1)
data.append(cell.getStringCellValue() + separetorString);
else
data.append(cell.getStringCellValue() );
break;
case Cell.CELL_TYPE_STRING:
if(i!=expectedColumns-1)
data.append(cell.getStringCellValue() + separetorString);
else
data.append(cell.getStringCellValue() );
break;
case Cell.CELL_TYPE_BLANK:
if(i!=expectedColumns-1)
data.append("" + separetorString);
else
data.append("");
break;
default:
if(i!=expectedColumns-1)
data.append(cell + separetorString);
else
data.append(cell);
}
}
if(data.length()!=0){
data.append('\n');
fos.write(data.toString().getBytes());
data.delete(0, data.length());
}
}
}
fos.close();
} catch (Exception ioe) {
ioe.printStackTrace();
}
}
public static boolean containsValue(Row row, int fcell, int lcell) {
boolean flag = false;
for (int i = fcell; i < lcell; i++) {
if (StringUtils.isEmpty(String.valueOf(row.getCell(i))) == true
|| StringUtils.isWhitespace(String.valueOf(row.getCell(i))) == true
|| StringUtils.isBlank(String.valueOf(row.getCell(i))) == true
|| String.valueOf(row.getCell(i)).length() == 0 || row.getCell(i) == null) {
} else {
flag = true;
}
}
return flag;
}
public static void main(String[] args) {
File inputFile = new File("/home/mypc/Desktop/poi/test.xlsx");
File outputFile = new File("/home/mypc/Desktop/poi/test.csv");
xlsx(inputFile, outputFile,30);
}
}