如何在多层次上使用itext读取PDF中的书签?

时间:2012-09-17 13:30:17

标签: java pdf itext

我正在使用iText-Java在书签级别拆分PDF。 有没有人知道或有任何在2级或3级书签中拆分PDF的例子? 例如:我有以下级别的书签:


| -son
| -son
| -Daughter
| - | - 大儿子 | - | - 格兰德的女儿

现在我有以下代码来读取读取基本书签(父亲)的书签。基本上,SimpleBookmark.getBookmark(阅读器)系列完成了所有工作。

但我想阅读第2级和第3级书签,以分割这些内部级书签之间的内容。

public static void splitPDFByBookmarks(String pdf, String outputFolder){ 
        try
        { 
            PdfReader reader = new PdfReader(pdf); 
            //List of bookmarks: each bookmark is a map with values for title, page, etc 
            List<HashMap> bookmarks = SimpleBookmark.getBookmark(reader); 
            for(int i=0; i<bookmarks.size(); i++){ 
                HashMap bm = bookmarks.get(i); 
                HashMap nextBM = i==bookmarks.size()-1 ? null : bookmarks.get(i+1); 
                //In my case I needed to split the title string 
                String title = ((String)bm.get("Title")).split(" ")[2]; 

                log.debug("Titel: " + title); 
                String startPage = ((String)bm.get("Page")).split(" ")[0]; 
                String startPageNextBM = nextBM==null ? "" + (reader.getNumberOfPages() + 1) : ((String)nextBM.get("Page")).split(" ")[0]; 
                log.debug("Page: " + startPage); 
                log.debug("------------------"); 
                extractBookmarkToPDF(reader, Integer.valueOf(startPage), Integer.valueOf(startPageNextBM), title + ".pdf",outputFolder); 
            } 
        } 
        catch (IOException e) 
        { 
            log.error(e.getMessage()); 
        } 
    } 

    private static void extractBookmarkToPDF(PdfReader reader, int pageFrom, int pageTo, String outputName, String outputFolder){ 
        Document document = new Document(); 
        OutputStream os = null; 

        try{ 
            os = new FileOutputStream(outputFolder + outputName); 

            // Create a writer for the outputstream 
            PdfWriter writer = PdfWriter.getInstance(document, os); 
            document.open(); 
            PdfContentByte cb = writer.getDirectContent(); // Holds the PDF data 
            PdfImportedPage page; 

            while(pageFrom < pageTo) { 
                document.newPage(); 
                page = writer.getImportedPage(reader, pageFrom); 
                cb.addTemplate(page, 0, 0); 
                pageFrom++; 
            } 

            os.flush(); 
            document.close(); 
            os.close(); 
        }catch(Exception ex){ 
            log.error(ex.getMessage()); 
        }finally { 
            if (document.isOpen()) 
                document.close(); 
            try { 
                if (os != null) 
                    os.close(); 
            } catch (IOException ioe) { 
                log.error(ioe.getMessage()); 
            } 
        } 
    } 

非常感谢您的帮助。 提前致谢! :)

2 个答案:

答案 0 :(得分:0)

当你致电ArrayList<HashMap>时,你得到一个SimpleBookmark.getBookmark(reader);(如果你需要,可以进行演员表)。尝试迭代该Arraylist并查看其结构。如果书签有儿子(如你所说),它将包含另一个具有相同结构的列表。

递归方法可能是解决方案。

答案 1 :(得分:0)

参考那些使用itext7进行浏览的人

public void walkOutlines(PdfOutline outline, Map<String, PdfObject> names, PdfDocument pdfDocument,List<String>titles,List<Integer>pageNum) { //----------loop traversing all paths
    
for (PdfOutline child : outline.getAllChildren()){
    if(child.getDestination() != null) {
        prepareIndexFile(child,names,pdfDocument,titles,pageNum,list);
    }
  }
}

// -----从轮廓中获取页面编号

 public void prepareIndexFile(PdfOutline outline, Map<String, PdfObject> names, PdfDocument pdfDocument,List<String>titles,List<Integer>pageNum) {
    
    String title = outline.getTitle();
    
    PdfDestination pdfDestination = outline.getDestination();
    String pdfStr = ((PdfString)pdfDestination.getPdfObject()).toUnicodeString();
    PdfArray array = (PdfArray) names.get(pdfStr);
    PdfObject pdfObj = array != null ? array.get(0) : null;
    
    Integer pageNumber = pdfDocument.getPageNumber((PdfDictionary)pdfObj);
    
    titles.add(title);
    pageNum.add(pageNumber);
    
    
    if(outline.getAllChildren().size() > 0) {
        
        for (PdfOutline child : outline.getAllChildren()){
            prepareIndexFile(child,names,pdfDocument,titles,pageNum);
        }
        
    }
    
 }

 public boolean splitPdf(String inputFile, final String outputFolder) {

    boolean splitSuccess = true;
    PdfDocument pdfDoc = null;
    try {
        PdfReader pdfReaderNew = new PdfReader(inputFile);
        pdfDoc = new PdfDocument(pdfReaderNew);
        
        final List<String> titles = new ArrayList<String>();
        List<Integer> pageNum = new ArrayList<Integer>();
        
        PdfNameTree destsTree = pdfDoc.getCatalog().getNameTree(PdfName.Dests);
        Map<String, PdfObject> names = destsTree.getNames();//--------------------------------------Core logic for getting names
        PdfOutline root = pdfDoc.getOutlines(false);//--------------------------------------Core logic for getting outlines
        
        walkOutlines(root,names, pdfDoc, titles, pageNum,content);  //------Logic to get bookmarks and pageNumbers
        

        if (titles == null || titles.size()==0) {
            splitSuccess = false;
        }else {                                                             //------Proceed if it has bookmarks
            
            for(int i=0;i<titles.size();i++) {
                 
                 String title = titles.get(i);
                 String startPageNmStr =""+pageNum.get(i);
                 int startPage = Integer.parseInt(startPageNmStr);
                 
                 int endPage = startPage;
                 
                 if(i == titles.size() - 1) {
                     endPage = pdfDoc.getNumberOfPages();
                 }else {
                     int nextPage =  pageNum.get(i+1);
                     if(nextPage > startPage) {
                         endPage = nextPage - 1;
                     }else {
                         endPage = nextPage;
                     }
                 }
                 
                 String outFileName = outputFolder + File.separator + getFileName(title) + ".pdf";
                 PdfWriter pdfWriter = new PdfWriter(outFileName);
                
                 PdfDocument newDocument = new PdfDocument(pdfWriter, new DocumentProperties().setEventCountingMetaInfo(null));
                 pdfDoc.copyPagesTo(startPage, endPage, newDocument);
                 newDocument.close();
                 pdfWriter.close();
            }
        }
    }catch(Exception e){
        //---log
    }       
 }