使用XSS和Event读取Java中巨大excel文件的多个选项卡

时间:2014-12-08 20:50:05

标签: java excel apache-poi

我正在使用来自(作者:lchen)的代码,该代码根据我提供的行数从excel文件中读取内容&#39; readRow()&#39;。< / p>

 import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.InputSource;


public class TestLargeFileRead {
    private int rowNum = 0;
    private OPCPackage opcPkg;
    private ReadOnlySharedStringsTable stringsTable;
    private XMLStreamReader xmlReader;


    public void XExcelFileReader(String excelPath) throws Exception {
        opcPkg = OPCPackage.open(excelPath, PackageAccess.READ);
        this.stringsTable = new ReadOnlySharedStringsTable(opcPkg);

        XSSFReader xssfReader = new XSSFReader(opcPkg);
        XMLInputFactory factory = XMLInputFactory.newInstance();
        InputStream inputStream = xssfReader.getSheetsData().next();
        xmlReader = factory.createXMLStreamReader(inputStream);


        while (xmlReader.hasNext()) {
            xmlReader.next();
            if (xmlReader.isStartElement()) {
                if (xmlReader.getLocalName().equals("sheetData"))
                    break;
            }
        }
    }


    public int rowNum() {
        return rowNum;
    }


    public List<String[]> readRows(int batchSize) throws XMLStreamException {
        String elementName = "row";
        List<String[]> dataRows = new ArrayList<String[]>();
        if (batchSize > 0) {
            while (xmlReader.hasNext()) {
                xmlReader.next();
                if (xmlReader.isStartElement()) {
                    if (xmlReader.getLocalName().equals(elementName)) {
                        rowNum++;
                        dataRows.add(getDataRow());
                        if (dataRows.size() == batchSize)
                            break;
                    }
                }
            }
        }
        return dataRows;
    }

    private String[] getDataRow() throws XMLStreamException {
        List<String> rowValues = new ArrayList<String>();
        while (xmlReader.hasNext()) {
            xmlReader.next();
            if (xmlReader.isStartElement()) {
                if (xmlReader.getLocalName().equals("c")) {
                    CellReference cellReference = new CellReference(
                            xmlReader.getAttributeValue(null, "r"));
                    // Fill in the possible blank cells!
                    while (rowValues.size() < cellReference.getCol()) {
                        rowValues.add("");
                    }
                    String cellType = xmlReader.getAttributeValue(null, "t");
                    rowValues.add(getCellValue(cellType));
                }
            } else if (xmlReader.isEndElement()
                    && xmlReader.getLocalName().equals("row")) {
                break;
            }
        }
        return rowValues.toArray(new String[rowValues.size()]);
    }

    private String getCellValue(String cellType) throws XMLStreamException {
        String value = ""; // by default
        while (xmlReader.hasNext()) {
            xmlReader.next();
            if (xmlReader.isStartElement()) {
                if (xmlReader.getLocalName().equals("v")) {
                    if (cellType != null && cellType.equals("s")) {
                        int idx = Integer.parseInt(xmlReader.getElementText());
                        return new XSSFRichTextString(
                                stringsTable.getEntryAt(idx)).toString();
                    } else {
                        return xmlReader.getElementText();
                    }
                }
            } else if (xmlReader.isEndElement()
                    && xmlReader.getLocalName().equals("c")) {
                break;
            }
        }
        return value;
    }

    @Override
    protected void finalize() throws Throwable {
        if (opcPkg != null)
            opcPkg.close();
        super.finalize();
    }
public static void main(String[] args)  {  
        try {  
            TestLargeFileRead howto = new TestLargeFileRead();  
            howto.XExcelFileReader("D:\\TEMP_CATALOG\\H1.xlsx");  
        } catch (Exception e) {  
            e.printStackTrace();  
        }  


    }
}

但它只读取了第一张SHEET的内容,并丢弃了其他后续的SHEETS。我的要求是阅读SHEET名称;并根据名称读取SHEET的内容。任何人都可以帮我定制上面的代码获取SHEET NAME及其内容吗?拜托?

1 个答案:

答案 0 :(得分:3)

您需要使用的关键类,并调整您的使用,是XSSFReader。如果您查看Javadocs for it,您会看到它提供了所有工作表的InputStream的迭代器,以及获取根工作簿流的​​方法。

如果要访问所有工作表,则需要更改以下行:

    InputStream inputStream = xssfReader.getSheetsData().next();
    xmlReader = factory.createXMLStreamReader(inputStream);

更像是:

Iterator<InputStream> sheetsData = xssfReader.getSheetsData();
while (sheetsData.hasNext()) {
    InputStream inputStream = sheetsData.next();
    xmlReader = factory.createXMLStreamReader(inputStream);

    ....
}

如果您还想获取工作表名称,那么您希望执行Apache POI XLSX event-based text extractor

中显示的内容。
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (sheetsData.hasNext()) {
    InputStream inputStream = sheetsData.next();
    String sheetName = iter.getSheetName();

    if (sheetName.equalsIgnoreCase("TheSheetIWant")) {
       xmlReader = factory.createXMLStreamReader(inputStream);

       ....
    }
}

如果您想了解更多关于这些内容的信息,那么最易于阅读和遵循的最佳示例之一就是Apache POI附带的XSSFEventBasedExcelExtractor - 请阅读相关代码学习!