使用Apache POI将内容添加到一个非常大的Excel文件中(用完替代品......)

时间:2016-03-04 20:56:25

标签: java excel apache-poi

我有一个大的xlsx文件,它有一个空的“数据源表”和其他包含许多使用数据源表的公式的表。我的应用程序应生成数据,打开文件,用该数据填充空白表并保存。我正在尝试使用Apache POI完成所有这些工作。 问题是打开文件需要不可接受的内存和时间。我已阅读其他主题但无法找到解决方案。 这就是我打开文件的方式:

pkg = OPCPackage.open(filename);
wb = new XSSFWorkbook(pkg);

请注意,使用SXSSFWorkbook不起作用,因为它的构造函数会占用XSSFWorkbook,而我首先无法创建它。 我需要的只是填充文件中的一个空表,我不需要将其完全加载到内存中。任何想法??

谢谢!

1 个答案:

答案 0 :(得分:0)

您可以尝试仅使用OPCPackage而不创建Workbook。但是,我们必须在较低级org.openxmlformats.schemas.spreadsheetml.x2006.main个对象上工作。这意味着在将字符串值存储为数据(XSSF)和评估公式时,我们没有来自SharedStringsTable对象的支持。

该示例使用带有至少4个工作表的Excel工作簿。第三个工作表是您的"数据源表"。它必须存在并将被新数据覆盖。第四个工作表是工作表,其中公式引用了"数据源表"。由于我们无法使用评估程序,因此必须将FullCalcOnLoad设置为true。如果我们不这样做,我们必须按[Ctrl] + [Alt] + [Shift] + [F9]强制完全重新计算。

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;

import org.apache.poi.xssf.model.SharedStringsTable;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;

import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheetData;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;

import  org.openxmlformats.schemas.officeDocument.x2006.relationships.STRelationshipId;

import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.XmlException;

import javax.xml.namespace.QName;

import java.util.List;
import java.util.Map;
import java.util.HashMap;

import java.util.regex.Pattern;

class ReadAndWriteTest5 {

 public static void main(String[] args) {
  try {

   File file = new File("ReGesamt11_3Test.xlsx");
   //we only open the OPCPackage, we don't create a Workbook
   OPCPackage opcpackage = OPCPackage.open(file);

   //if there are strings in the SheetData, we need the SharedStringsTable
   PackagePart sharedstringstablepart = opcpackage.getPartsByName(Pattern.compile("/xl/sharedStrings.xml")).get(0);
   SharedStringsTable sharedstringstable = new SharedStringsTable();
   sharedstringstable.readFrom(sharedstringstablepart.getInputStream());

   //create empty WorksheetDocument for the "data source sheet"
   WorksheetDocument worksheetdocument = WorksheetDocument.Factory.newInstance();
   CTWorksheet worksheet = worksheetdocument.addNewWorksheet();
   CTSheetData sheetdata = worksheet.addNewSheetData();

   //put some data in for the "data source sheet"
   for (int i = 0; i < 10; i++) {

    CTCell ctcell= sheetdata.addNewRow().addNewC();

    CTRst ctstr = CTRst.Factory.newInstance();
    ctstr.setT("DataRow " + i);
    int sRef = sharedstringstable.addEntry(ctstr);
    ctcell.setT(STCellType.S);
    ctcell.setV(Integer.toString(sRef));

    ctcell=sheetdata.getRowArray(i).addNewC();
    ctcell.setV(""+(i*100+(i+1)*10+(i+2))+"."+((i+3)*10+(i+4)));

   }

   //write the SharedStringsTable
   OutputStream out = sharedstringstablepart.getOutputStream();
   sharedstringstable.writeTo(out);
   out.close();

   //create XmlOptions for saving the worksheet
   XmlOptions xmlOptions = new XmlOptions();
   xmlOptions.setSaveOuter();
   xmlOptions.setUseDefaultNamespace();
   xmlOptions.setSaveAggressiveNamespaces();
   xmlOptions.setCharacterEncoding("UTF-8");
   xmlOptions.setSaveSyntheticDocumentElement(new QName(CTWorksheet.type.getName().getNamespaceURI(), "worksheet"));
   Map<String, String> map = new HashMap<String, String>();
   map.put(STRelationshipId.type.getName().getNamespaceURI(), "r");
   xmlOptions.setSaveSuggestedPrefixes(map);

   //get the PackagePart of the third sheet which is the "data source sheet" 
   //this sheet must exist and will be replaced with the new content
   PackagePart sheetpart = opcpackage.getPartsByName(Pattern.compile("/xl/worksheets/sheet3.xml")).get(0);
   //save the worksheet as the third sheet which is the "data source sheet" 
   out = sheetpart.getOutputStream();
   worksheet.save(out, xmlOptions);
   out.close();

   //get the PackagePart of the fourth sheet which is the sheet on which formulas are referencing the "data source sheet"
   //since we can't use Evaluator, we must force recalculation on load for this sheet
   sheetpart = opcpackage.getPartsByName(Pattern.compile("/xl/worksheets/sheet4.xml")).get(0);
   worksheetdocument = WorksheetDocument.Factory.parse(sheetpart.getInputStream());
   worksheet = worksheetdocument.getWorksheet();
   //setFullCalcOnLoad true
   if (worksheet.getSheetCalcPr() == null) {
    worksheet.addNewSheetCalcPr().setFullCalcOnLoad(true);
   } else {
    worksheet.getSheetCalcPr().setFullCalcOnLoad(true);
   }
   out = sheetpart.getOutputStream();
   worksheet.save(out, xmlOptions);
   out.close();

   opcpackage.close();

  } catch (InvalidFormatException ifex) {
     ifex.printStackTrace();
  } catch (FileNotFoundException fnfex) {
     fnfex.printStackTrace();
  } catch (IOException ioex) {
     ioex.printStackTrace();
  } catch (XmlException xmlex) {
     xmlex.printStackTrace();
  }
 }
}