我有一个.doc
文件,我想使用Apache-poi
查找上标和下标。
答案 0 :(得分:1)
以下示例显示了从docx文件中读取上标/下标的方法。 Doc也会类似。
package demo.poi;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
public class DocReaderTest {
@Test
public void showReadDocWithSubscriptAndSuperScript() throws IOException, InvalidFormatException {
File docFile = new File("C:/temp/sample.docx");
XWPFDocument hdoc = new XWPFDocument(OPCPackage.openOrCreate(docFile));
Iterator<XWPFParagraph> paragraphsIterator = hdoc.getParagraphsIterator();
while (paragraphsIterator.hasNext()) {
XWPFParagraph next = paragraphsIterator.next();
for (XWPFRun xwrun : next.getRuns()) {
VerticalAlign subscript = xwrun.getSubscript();
String smalltext = xwrun.getText(0);
switch (subscript) {
case BASELINE:
System.out.println("smalltext, plain = " + smalltext);
break;
case SUBSCRIPT:
System.out.println("smalltext, subscript = " + smalltext);
break;
case SUPERSCRIPT:
System.out.println("smalltext, superscript = " + smalltext);
break;
}
}
}
}
}