如何在文本文件中获取带书签的表格和行?

时间:2016-05-13 04:57:00

标签: java docx4j

我的docx文件包含以下字符串

"我的名字是santhanam" "我来自印度" "我喜欢docx4j"

我用以书签名para0,para1,para2为上述三段添加了书签。我需要将输出作为文本文件,并带有以下字符串

 {para0}My name is santhanam{para0}
 {para1}I'm from India{para1}
 {para2}I love docx4j{para2}

我已经成功完成了以下代码。

public class GetBookMark {

public static void main(String[] args) throws Exception {

    String docString = "";
    String outputfilepath = "5.txt";
    String inputfilepath = "bookmark.docx";

    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath));
    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
    // String bookmark[] = new String[100000];
    GetBookMark gb = new GetBookMark();
    ClassFinder finder = new ClassFinder(CTBookmark.class); // <----- change
                                                            // this to suit

    new TraversalUtil(documentPart.getContent(), finder);

    for (Object o : finder.results) 
    {
        CTBookmark BookMkStart = (CTBookmark) o;
        String BookMarkName = BookMkStart.getName();
        if (BookMarkName.startsWith("para")) {
            P p = gb.findBookmarkedParagraphInMainDocumentPart(BookMarkName, documentPart);

            List<Object> texts = getAllElementFromObject(p, Text.class);
            if (texts.size() == 0) {

            } else {
                Text t1st = (Text) texts.get(0);
                t1st.setValue("<" + BookMarkName + ">" + t1st.getValue());
                Text tLast = (Text) texts.get(texts.size() - 1);
                tLast.setValue(tLast.getValue() + "</" + BookMarkName + ">");
            }

            for (Object o1 : texts) {
                Text t = (Text) o1;
                docString += t.getValue();
            }
            docString += "\r\n";
        }
    }
//  System.out.println("Document\n---------------\n" + docString);
    try {
        // BufferedWriter bw = new BufferedWriter(new
        // FileWriter(outputfilepath));
        Writer writer = new OutputStreamWriter(new FileOutputStream(outputfilepath), "UTF-8");
        BufferedWriter bw = new BufferedWriter(writer);
        bw.write(docString);

        bw.close();
    } catch (Exception e) {
        System.out.println("Exception while writing to file : " + e);
    }

}

public static List<Object> getAllElementFromObject(Object obj, Class<?> toSearch) {
    List<Object> result = new ArrayList<Object>();
    if (obj instanceof JAXBElement)
        obj = ((JAXBElement<?>) obj).getValue();

    if (obj.getClass().equals(toSearch))
        result.add(obj);
    else if (obj instanceof ContentAccessor) {
        List<?> children = ((ContentAccessor) obj).getContent();
        for (Object child : children) {
            result.addAll(getAllElementFromObject(child, toSearch));
        }
    }
    return result;
}

private P findBookmarkedParagraphInMainDocumentPart(String name, MainDocumentPart documentPart)
        throws JAXBException, Docx4JException {
    final String xpath = "//w:bookmarkStart[@w:name='" + name + "']/..";
    List<Object> objects = documentPart.getJAXBNodesViaXPath(xpath, false);
    return (org.docx4j.wml.P) XmlUtils.unwrap(objects.get(0));
}

// No xpath implementation for other parts than main document; traverse
// manually
private P findBookmarkedParagraphInPart(Object parent, String bookmark) {
    P p = traversePartForBookmark(parent, bookmark);
    return p;
}

// Used internally by findBookmarkedParagrapghInPart().
private P traversePartForBookmark(Object parent, String bookmark) {
    P p = null;
    List children = TraversalUtil.getChildrenImpl(parent);
    if (children != null) {
        for (Object o : children) {
            o = XmlUtils.unwrap(o);
            if (o instanceof CTBookmark) {
                if (((CTBookmark) o).getName().toLowerCase().equals(bookmark)) {
                    return (P) parent; // If bookmark found, the surrounding
                                        // P is what is interesting. 
                }
            }
            p = traversePartForBookmark(o, bookmark);
            if (p != null) {
                break;
            }
        }
    }
    return p;
}
}

现在我将docx文件加入书签,该文件包含带有table0到table(table)的表,带有书签(Tr)行和(Tc)单元格。是否可以将输出作为

 {table0}{row0}{cello}{para0}text string{para0}{para1}text string{para1}{cell0}{row0} 
 {row1}{cell1}{para2}text string{para2}{para3}text string{para3}{cell1}{row1}{table0} 

提前致谢。

UPDATE 现在,我在中途使用以下代码

 public class GetBookMark {



public static void main(String[] args) throws Exception {

    String docString = "";
    String outputfilepath = "BMChapter 14.txt";
    String inputfilepath = "BMTable.docx";
    String rowbm = null;
    String tblbm = null;
    String parabm = null;
    String cellbm = null;
    String tblparabm = null;
    List<Object> tblTexts = null;
    String partDocString = null;
    String prtblbm = null;
    String prrowbm = null;
    String prcellbm = null;

    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath));
    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();

    GetBookMark gb = new GetBookMark();

    ClassFinder finder = new ClassFinder(CTBookmark.class); // <----- change
    new TraversalUtil(documentPart.getContent(), finder);

    for (Object o : finder.results) {
        CTBookmark BookMkStart = (CTBookmark) o;
        String BookMarkName = BookMkStart.getName();

        if (BookMarkName.startsWith("para")) {
            P p = gb.findBookmarkedParagraphInMainDocumentPart(BookMarkName, documentPart);

            List<Object> texts = getAllElementFromObject(p, Text.class);

            if (texts.size() == 0) {

            } else {
                Text t1st = (Text) texts.get(0);
                t1st.setValue("<" + BookMarkName + ">" + t1st.getValue());
                Text tLast = (Text) texts.get(texts.size() - 1);
                tLast.setValue(tLast.getValue() + "</" + BookMarkName + ">");
            }

            for (Object o1 : texts) {
                Text t = (Text) o1;
                docString += t.getValue();
            }
            docString += "\r\n";

        } else {
            if (BookMarkName.startsWith("table")) {
                // rowbm = "</"+BookMarkName+">";
                // tblbm = "<"+BookMarkName+">";
                tblbm = BookMarkName;
            }
            if (BookMarkName.startsWith("row")) {
                // rowbm = "</"+BookMarkName+">" +rowbm;
                // tblbm +="<"+BookMarkName+">";
                rowbm = BookMarkName;

            }
            if (BookMarkName.startsWith("cell")) {
                // rowbm = "</"+BookMarkName+">" +rowbm;
                // tblbm+="<"+BookMarkName+">";
                cellbm = BookMarkName;
            }
            if (BookMarkName.startsWith("tble")) {
                // rowbm = "</"+BookMarkName+">" +rowbm;
                // tblbm+="<"+BookMarkName+">";
                tblparabm = BookMarkName;
                P p = gb.findBookmarkedParagraphInMainDocumentPart(BookMarkName, documentPart);
                List<Object> texts = getAllElementFromObject(p, Text.class);

                if (texts.size() == 0) {

                } else {

                    if (prtblbm != tblbm) {
                        docString += "<" + tblbm + ">";
                    }
                    if (prrowbm != rowbm) {

                        docString += "<" + rowbm + ">";
                    }
                    if (prcellbm != cellbm) {

                        docString += "<" + cellbm + ">";
                    }

                                        Text t1st = (Text) texts.get(0);
                    t1st.setValue("<" + tblparabm + ">" + t1st.getValue());

                    Text tLast = (Text) texts.get(texts.size() - 1);
                    tLast.setValue(tLast.getValue() + "</" + tblparabm + ">");
                    } 



                    prtblbm = tblbm;
                    prrowbm = rowbm;
                    prcellbm = cellbm;
                }

                for (Object o1 : texts) {
                    Text t = (Text) o1;
                    docString += t.getValue();
                }
                docString += "\r\n";

            }
        }
  try {

            Writer writer = new OutputStreamWriter(new FileOutputStream(outputfilepath), "UTF-8");
            BufferedWriter bw = new BufferedWriter(writer);
            bw.write(docString);

            bw.close();
        } catch (Exception e) {
            System.out.println("Exception while writing to file : " + e);
        }

     }
      System.out.println(docString);
     } 

0 个答案:

没有答案