如何使用apache poi基于行数将excel文件拆分为多个文件?

时间:2016-05-21 18:26:01

标签: java excel apache-poi

我是apache poi的新手,我想根据行数将excel文件拆分成多个文件。

例如,data.xlsx有15k行,新文件应该像data_1.xlsx一样有5k行,data_2.xlsx应该是5-10k,data_3.xlsx应该是10-15k。

3 个答案:

答案 0 :(得分:7)

我找到了你。

package com.industries.seanimus;  

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;  

public class ReportSplitter {

    private final String fileName;
    private final int maxRows;

    public ReportSplitter(String fileName, final int maxRows) {

        ZipSecureFile.setMinInflateRatio(0);

        this.fileName = fileName;
        this.maxRows = maxRows;

        try {
            /* Read in the original Excel file. */
            OPCPackage pkg = OPCPackage.open(new File(fileName));
            XSSFWorkbook workbook = new XSSFWorkbook(pkg);
            XSSFSheet sheet = workbook.getSheetAt(0);

            /* Only split if there are more rows than the desired amount. */
            if (sheet.getPhysicalNumberOfRows() >= maxRows) {
                List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
                writeWorkBooks(wbs);
            }
            pkg.close();
        }
        catch (EncryptedDocumentException | IOException | InvalidFormatException e) {
            e.printStackTrace();
        }
    }

    private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) {

        List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();

        SXSSFWorkbook wb = new SXSSFWorkbook();
        SXSSFSheet sh = wb.createSheet();

        SXSSFRow newRow;
        SXSSFCell newCell;

        int rowCount = 0;
        int colCount = 0;

        XSSFSheet sheet = workbook.getSheetAt(0);

        for (Row row : sheet) {
            newRow = sh.createRow(rowCount++);

            /* Time to create a new workbook? */
            if (rowCount == maxRows) {
                workbooks.add(wb);
                wb = new SXSSFWorkbook();
                sh = wb.createSheet();
                rowCount = 0;
            }

            for (Cell cell : row) {
                newCell = newRow.createCell(colCount++);
                newCell = setValue(newCell, cell);

                CellStyle newStyle = wb.createCellStyle();
                newStyle.cloneStyleFrom(cell.getCellStyle());
                newCell.setCellStyle(newStyle);
            }
            colCount = 0;
        }

        /* Only add the last workbook if it has content */
        if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
            workbooks.add(wb);
        }
        return workbooks;
    }

    /*
     * Grabbing cell contents can be tricky. We first need to determine what
     * type of cell it is.
     */
    private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            if (DateUtil.isCellDateFormatted(cell)) {
                newCell.setCellValue(cell.getDateCellValue());
            } else {
                newCell.setCellValue(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        default:
            System.out.println("Could not determine cell type");
        }
        return newCell;
    }

    /* Write all the workbooks to disk. */
    private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
        FileOutputStream out;
        try {
            for (int i = 0; i < wbs.size(); i++) {
                String newFileName = fileName.substring(0, fileName.length() - 5);
                out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
                wbs.get(i).write(out);
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args){
        /* This will create a new workbook every 1000 rows. */
        new ReportSplitter("Data.xlsx", 1000);
    }

}

一些注意事项:

  • 为了编写工作簿,我使用了 SXSSFWorkbook。这很重要 比HSSF或XSSF更快,因为它不会在内存中保存所有内容 在写作之前(导致可怕的gc混乱)。

  • Busy Developer's Guide是您学习Apache POI的朋友;)

<强>享受!

<小时/> 编辑:我已更新代码以复制单元格样式。有两点需要注意:

  • 复制样式会使事情大幅减少。
  • POI会创建一个template文件,该文件可能太大无法解压缩,导致检测到 Zip炸弹错误。您可以通过ZipSecureFile.setMinInflateRatio(0)更改最低通胀率来解决此问题。

答案 1 :(得分:0)

感谢您的代码。我身边只有两分钱 上面的代码不会复制时间,因此我将其修改为具有Time Columns,这是setValue代码中的一个小修改。 基本上我正在检查使用格式部分,如果它是时间列,年份将是1899 希望对您有所帮助:)

 private static SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            //System.out.println("The Cell Type is numeric ");
            if (DateUtil.isCellDateFormatted(cell)) {
                System.out.println(cell.getDateCellValue());
                Date c = cell.getDateCellValue();
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("hh:mm:ss");
                SimpleDateFormat year = new SimpleDateFormat("yyyy");
                String strTime = simpleDateFormat.format(c);
                String strYear=year.format(c);
                if(strYear.equals("1899"))
                {
                    System.out.println(strTime);
                    newCell.setCellValue(DateUtil.convertTime(strTime));    
                }
                else
                {
                    newCell.setCellValue(c);
                }



            } else {
                newCell.setCellValue(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        default:
            System.out.println("Could not determine cell type");
        }
        return newCell;
    }

答案 2 :(得分:-1)

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;  


public class SplitFile {

    private final String fileName;
    private final int maxRows;
    private final String path;
    private final String userfilename="";
    public static int filecount;
    public static String taskname;
    public static int rowcounter;

    public SplitFile(String fileName, final int maxRows, String filepath, String userfilename) throws FileNotFoundException     {
        path = filepath;
        taskname = userfilename;
        this.fileName = fileName;
        this.maxRows = maxRows;
        System.out.println("In Constructor");

        File file = new File(fileName);
        FileInputStream inputStream = new FileInputStream(file);
        try {
            /* Read in the original Excel file. */
            //OPCPackage pkg = OPCPackage.open(new File(fileName));
            Workbook workbook = new XSSFWorkbook(inputStream);
            XSSFSheet sheet = (XSSFSheet) workbook.getSheetAt(0);
            System.out.println("Got Sheet");
            /* Only split if there are more rows than the desired amount. */
            if (sheet.getPhysicalNumberOfRows() >= maxRows) {
                List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
                writeWorkBooks(wbs);
            }

        }
        catch (EncryptedDocumentException | IOException e) {
            e.printStackTrace();
        }
    }

    private List<SXSSFWorkbook> splitWorkbook(Workbook workbook) {

        List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();

        SXSSFWorkbook wb = new SXSSFWorkbook();
        SXSSFSheet sh = (SXSSFSheet) wb.createSheet();

        SXSSFRow newRow,headRow = null;
        SXSSFCell newCell;
        String headCellarr[] = new String[50];

        int rowCount = 0;
        int colCount = 0;
        int headflag = 0;
        int rcountflag = 0;
        int cols = 0;

        XSSFSheet sheet = (XSSFSheet) workbook.getSheetAt(0);
        //sheet.createFreezePane(0, 1);
        int i = 0;
        rowcounter++;
        for (Row row : sheet) {
            if(i==0)
            {
            //headRow = sh.createRow(rowCount++);

            /* Time to create a new workbook? */
            int j = 0;
            for (Cell cell : row) {

                //newCell = headRow.createCell(colCount++);
                headCellarr[j] = cell.toString();
                j++;
            }
            cols = j;
            colCount = 0;
            i++;
            }
            else
            {
                break;
            }

        }

        for (Row row : sheet) {

            //newRow = sh.createRow(rowCount++);
            /* Time to create a new workbook? */
            if (rowCount == maxRows) {
                headflag = 1;
                workbooks.add(wb);
                wb = new SXSSFWorkbook();
                sh = (SXSSFSheet) wb.createSheet();
                rowCount = 0;

            }
                if(headflag == 1)
                {
                    newRow = (SXSSFRow) sh.createRow(rowCount++);
                    headflag = 0;
                    for(int k=0;k<cols;k++)
                    {
                        newCell = (SXSSFCell) newRow.createCell(colCount++);
                        newCell.setCellValue(headCellarr[k]);

                    }
                    colCount = 0;
                    newRow = (SXSSFRow) sh.createRow(rowCount++);

                     for (Cell cell : row) {
                        newCell = (SXSSFCell) newRow.createCell(colCount++);
                        if(cell.getCellType() == Cell.CELL_TYPE_BLANK)
                        {
                            newCell.setCellValue("-");
                        }
                        else
                        {
                        newCell = setValue(newCell, cell);
                        }
                    }
                    colCount = 0;


                }
                else
                {
                    rowcounter++;
                newRow = (SXSSFRow) sh.createRow(rowCount++);
                for(int cn=0; cn<row.getLastCellNum(); cn++) {
                   // If the cell is missing from the file, generate a blank one
                   // (Works by specifying a MissingCellPolicy)
                   Cell cell = row.getCell(cn, Row.CREATE_NULL_AS_BLANK);
                   // Print the cell for debugging
                   //System.out.println("CELL: " + cn + " --> " + cell.toString());
                   newCell = (SXSSFCell) newRow.createCell(cn);
                   if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC)
                   {
                      newCell.setCellValue(cell.getNumericCellValue());
                   }
                   else
                   {
                   newCell.setCellValue(cell.toString());
                   }
            }
                }
              }


        /* Only add the last workbook if it has content */
        if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
            workbooks.add(wb);
        }
        return workbooks;
    }

    /*
     * Grabbing cell contents can be tricky. We first need to determine what
     * type of cell it is.
     */
    private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            if (DateUtil.isCellDateFormatted(cell)) {
                newCell.setCellValue(cell.getDateCellValue());
            } else {
                //newCell.setCellValue(cell.getNumericCellValue());
                newCell.setCellValue(cell.toString());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        case Cell.CELL_TYPE_BLANK:
            newCell.setCellValue("-");
            break;
        default:
            System.out.println("Could not determine cell type");
            newCell.setCellValue(cell.toString());

        }
        return newCell;
    }

    /* Write all the workbooks to disk. */
    private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
        FileOutputStream out;
        boolean mdir = new File(path + "/split").mkdir();

        try {
            for (int i = 0; i < wbs.size(); i++) {
                String newFileName = fileName.substring(0, fileName.length() - 5);
                //out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
                out = new FileOutputStream(new File(path + "/split/" + taskname + "_" + (i + 1) + ".xlsx"));
                wbs.get(i).write(out);
                out.close();
                System.out.println("Written" + i);
                filecount++;
            }
            System.out.println(userfilename);
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
    public int sendtotalrows()
    {
        return rowcounter;
    }

    public static void main(String[] args) throws FileNotFoundException{
        // This will create a new workbook every 1000 rows.
        // new Splitter(filename.xlsx, No of split rows, filepath, newfilename);
        new SplitFile("filepath/filename.xlsx", 10000, "filepath", "newfilename");  //No of rows to split: 10 K
    }
}