使用Apache poi在.doc文件中读取Superscript和Subscript?

时间:2013-09-30 04:16:49

标签: java ms-word apache-poi

我有一个.doc文件,我想使用Apache-poi查找上标和下标。

1 个答案:

答案 0 :(得分:1)

以下示例显示了从docx文件中读取上标/下标的方法。 Doc也会类似。

package demo.poi;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.junit.Test;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;

public class DocReaderTest {

    @Test

    public void showReadDocWithSubscriptAndSuperScript() throws IOException, InvalidFormatException {
        File docFile = new File("C:/temp/sample.docx");

        XWPFDocument hdoc = new XWPFDocument(OPCPackage.openOrCreate(docFile));

        Iterator<XWPFParagraph> paragraphsIterator = hdoc.getParagraphsIterator();
        while (paragraphsIterator.hasNext()) {
            XWPFParagraph next = paragraphsIterator.next();
            for (XWPFRun xwrun : next.getRuns()) {
                VerticalAlign subscript = xwrun.getSubscript();
                String smalltext = xwrun.getText(0);
                switch (subscript) {
                    case BASELINE:
                        System.out.println("smalltext, plain = " + smalltext);
                        break;
                    case SUBSCRIPT:
                        System.out.println("smalltext, subscript = " + smalltext);
                        break;
                    case SUPERSCRIPT:
                        System.out.println("smalltext, superscript = " + smalltext);
                        break;
                }
            }
        }
    }
}