我正在开发一个使用Apache POI读取excel xlsb文件的Java应用程序,但是在读取它时遇到异常,我的代码如下:
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.Package;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.util.Iterator;
public class Prueba {
public static void main (String [] args){
String direccion = "C:/Documents and Settings/RSalasL/My Documents/New Folder/masstigeoct12.xlsb";
Package pkg;
try {
pkg = Package.open(direccion);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (OpenXML4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void processAllSheets(String filename) throws Exception {
Package pkg = Package.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
public static XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser =
XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser"
);
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell
if(name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if(cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if(nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if(name.equals("v")) {
System.out.println(lastContents);
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
}
}
例外是:
java.io.CharConversionException: Characters larger than 4 bytes are not supported: byte 0x83 implies a length of more than 4 bytes
at org.apache.xmlbeans.impl.piccolo.xml.UTF8XMLDecoder.decode(UTF8XMLDecoder.java:162)
at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader$FastStreamDecoder.read(XMLStreamReader.java:762)
at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader.read(XMLStreamReader.java:162)
at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yy_refill(PiccoloLexer.java:3474)
at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:3958)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439)
at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270)
at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257)
at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
at org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument$Factory.parse(Unknown Source)
at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:207)
at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:166)
at org.apache.poi.xssf.eventusermodel.XSSFReader.getSheetsData(XSSFReader.java:160)
at EDManager.Prueba.main(Prueba.java:36)
该文件有2张,一张包含329行和3列,另一张包含566行和3列,我只想读取文件以查找值是否在第二张纸中。
答案 0 :(得分:10)
Apache POI不支持除文本提取之外的任何其他文件的.xlsb文件格式。 Apache POI将很乐意提供完整的读取或写入支持.xls文件(通过HSSF)和.xlsx文件(通过XSSF),或两者(通过公共SS UserModel接口)。
但是,生成操作不支持.xlsb格式 - 这是两者之间非常奇怪的混合,而且涉及的大量工作意味着没有人愿意自愿/赞助所需的工作。
自Apache POI 3.15 beta3 / 3.16起,Apache POI为.xlsb提供的内容为text extractor for .xlsb files - XSSFBEventBasedExcelExtractor。您可以使用它来从文件中获取文本,或者通过一些调整将其转换为CSV
之类的内容对于完全读/写支持,您需要将文件转换为.xls(如果它没有非常大的行/列)或.xlsx(如果有)。如果您真的非常渴望提供帮助,那么您可以查看source code for XSSFBEventBasedExcelExtractor,然后继续提供补丁,为其添加对POI的全面支持!
(另外,我认为你的特定.xlsb文件部分损坏的例外,但即使不是它仍然不支持Apache POI除文本提取以外的任何其他内容,抱歉)
答案 1 :(得分:0)
我有一个使用smartxls的实现,我的代码firts将xlsb转换为xlsx,之后可以使用ApachePoi。下一个方法接收java.io.File并验证其扩展名是否为xlsb并将其转换为xlsx并将文件替换为新的。这对我有用。
private void processXLSBFile(File file) {
WorkBook workBook = new WorkBook();
String filePath = file.getAbsolutePath();
if (FilenameUtils.getExtension(filePath).equalsIgnoreCase((Static.XLSB_EXT))) {
try {
workBook.readXLSB(new java.io.FileInputStream(filePath));
filePath = filePath.replaceAll("(?i)".concat(Static.XLSB),
Static.XLSX_EXT.toLowerCase());
workBook.writeXLSX(new java.io.FileOutputStream(filePath));
final File xlsb = new File(filePath);
file = xlsb;
} catch (Exception e) {
logger.error(e.getMessage(), e);
MensajesJSFUtil
.mostrarMensajeNegocio(new GTMException(e, ClaveMensaje.COMANDAS_ADJUNTAR_XLSBFILE_READERROR));
}
}
}
答案 2 :(得分:0)
我尝试使用Apache POI读取XLSB文件,并且成功。下面是我使用的代码段。
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class ApachePoiXLSB {
public static void main (String [] args){
String xlsbFileName = "test.xlsb";
OPCPackage pkg;
try {
pkg = OPCPackage.open(xlsbFileName);
XSSFBReader r = new XSSFBReader(pkg);
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator) r.getSheetsData();
List<String> sheetTexts = new ArrayList<>();
while (it.hasNext()) {
InputStream is = it.next();
String name = it.getSheetName();
TestSheetHandler testSheetHandler = new TestSheetHandler();
testSheetHandler.startSheet(name);
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
xssfbStylesTable,
it.getXSSFBSheetComments(),
sst, testSheetHandler,
new DataFormatter(),
false);
sheetHandler.parse();
testSheetHandler.endSheet();
sheetTexts.add(testSheetHandler.toString());
}
System.out.println("output text:"+sheetTexts);
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (OpenXML4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;
class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
private final StringBuilder sb = new StringBuilder();
public void startSheet(String sheetName) {
sb.append("<sheet name=\"").append(sheetName).append(">");
}
public void endSheet() {
sb.append("</sheet>");
}
@Override
public void startRow(int rowNum) {
sb.append("\n<tr num=\"").append(rowNum).append(">");
}
@Override
public void endRow(int rowNum) {
sb.append("\n</tr num=\"").append(rowNum).append(">");
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
formattedValue = (formattedValue == null) ? "" : formattedValue;
if (comment == null) {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
} else {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
.append(formattedValue)
.append("<span type=\"comment\" author=\"")
.append(comment.getAuthor()).append("\">")
.append(comment.getString().toString().trim()).append("</span>")
.append("</td>");
}
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
if (isHeader) {
sb.append("<header tagName=\"").append(tagName).append("\">").append(text).append("</header>");
} else {
sb.append("<footer tagName=\"").append(tagName).append("\">").append(text).append("</footer>");
}
}
@Override
public String toString() {
return sb.toString();
}
}