使用iText库提取目录,章节和索引的内容

时间:2014-01-31 09:06:14

标签: itext information-extraction

有人可以建议我如何使用iText库以编程方式从电子书(PDF)中提取目录,章节和索引的内容吗?

1 个答案:

答案 0 :(得分:0)

我通过解析书签来实现这一目标。下面是我用来编写使用iText从PDF解析书签的代码。

public void ParseBookMarkToLevel(List<HashMap<String,Object>> listBookmarks,Integer   intLevel )
{

    //if intLevel == 999 Parse the full bookmarks recursively to all levels 
    //System.out.println("Bookmarks Count: "+listBookmarks.size());
    if(intLevel != 999) 
        intLevel--;
    else
        intLevel=999;

    //System.out.println("Bookmarks Count: "+listBookmarks.size());
    String title ="";
    String pageStr ="";
    String[] pageStrArr;
    Integer pageNumber;

    try{
        for (HashMap<String,Object> bookmark: listBookmarks) {

            //System.out.println("Page Type: "+bookmark.get("Page").getClass());

            title = (bookmark.containsKey("Title")?bookmark.get("Title").toString() :"");

            if(bookmark.containsKey("Page")){
                pageStr = bookmark.get("Page").toString(); 
                pageStrArr = pageStr.split(" ");           
                pageNumber = Integer.valueOf(pageStrArr[0]);
                //System.out.println(""+title+"\t :"+pageNumber);
                this.lhmBookMarks.put(title, pageNumber);
                System.out.println(this.lhmBookMarks.toString());
            }
            else{
                //System.out.println(title);
                this.lhmBookMarks.put(title, 0);
            }           
            if(bookmark.containsKey("Kids") && intLevel != 0){
                this.ParseBookMarkToLevel((List<HashMap<String, Object>>) bookmark.get("Kids"),intLevel);
            }
        }
    }
    catch(Exception i){
        System.out.println(i);
    }
}