如何遍历PDF的COSObject?

时间:2019-07-12 16:34:40

标签: java pdfbox

我有一个PDF(https://www.dropbox.com/s/g2wqm8ipsp8t8l5/GSA%20500%20PDF_v4.pdf?dl=0),我正在尝试查找它是否包含附加到ImageField的图片。为此,我使用PDFDebugger检查了PDF,并可以看到具有类似以下内容的PDF层次结构:/ page / Annots / AP / N / Resources / XObject / FRM / Resources / XObject / lm0以获取图片。我尝试了一些代码,如下所示,但是我无法超越N / Resources,因为从资源开始,它不是COSDictionary。有什么想法吗?

File pdfFile = new File("C:\\Users\\balana.ITGFIRM\\Desktop\\GSA 500 PDF\\GSA 500 PDF_v4.pdf");
        String filePath = pdfFile.getParent() + System.getProperty("file.separator");
        document = PDDocument.load(pdfFile);

        PDDocumentCatalog pdfCatalog = document.getDocumentCatalog();
        PDAcroForm acroForm = pdfCatalog.getAcroForm();
        List<PDPage> pages = document.getDocumentCatalog().getAllPages();
        for (int i = 0; i < pages.size(); i++) {

            PDPage page = pages.get(i);
            COSBase test = pages.get(i).getCOSObject();
            // System.out.println("page: " + i + " base: "+ test);
            if (page != null && (i == 2 || i == 3 || i == 4)) {
                if (page.getAnnotations() != null) {
                    if (page.getAnnotations().get(i) != null) {
                        List<PDAnnotation> annots = page.getAnnotations();
                        for (int k = 0; k < annots.size(); k++) {
                            PDAnnotation annot = annots.get(k);
                            // String annotationName = annot.getAnnotationName();
                            // System.out.println("annotation name: "+ annotationName);
                            if (annot == null) {
                                return;
                            } else {
                                PDAppearanceDictionary appear = annot.getAppearance();
                                if (appear == null) {
                                    return;
                                } else {
                                    Map<String, PDAppearanceStream> mapAppear = appear.getNormalAppearance();
                                    // System.out.println("Map appear: "+ mapAppear);
                                    if (mapAppear == null) {
                                        return;
                                    } else {
                                        for (Map.Entry<String, PDAppearanceStream> ent : mapAppear.entrySet()) {
                                            PDAppearanceStream pd = ent.getValue();
                                            if (pd == null) {
                                                return;
                                            } else {
                                                PDResources res = pd.getResources();
                                                Map<String, PDXObject> xObjects = res.getXObjects();
                                                if (xObjects == null)
                                                    return;
                                                for (Map.Entry<String, PDXObject> e : xObjects.entrySet()) {
                                                    PDXObject xObject = e.getValue();
                                                    if (xObject instanceof PDXObjectImage) {
                                                        PDXObjectImage image = (PDXObjectImage) xObject;
                                                        String suffix = image.getSuffix();
                                                        System.out.println("suffix: " + suffix);
                                                    }
                                                }
                                            }

                                        }
                                    }

                                }

                            }

                            // System.out.println("appear: "+ appear);
                            // System.out.println("index: " + k + " annot : "+ annot);
                            // COSBase annotationBase = annot.getDictionary();
                            // System.out.println("annot base: "+ annotationBase);
                            // System.out.println("\n\n");
                            // COSBase apDict = ((COSDictionary)annotationBase).getItem(COSName.AP);
                            // System.out.println("annot AP: "+ apDict);
                            // System.out.println("\n\n");
                            // COSBase nDict = ((COSDictionary)apDict).getItem(COSName.N);
                            // System.out.println("annot N: "+ nDict);
                            // System.out.println("\n\n");
                            // COSBase appearance = annot.getAppearance().getDictionary();
                            // System.out.println("appearance : "+ appearance);
                            // System.out.println("\n\n");
                            // COSBase resources = ((COSDictionary)appearance).getItem(COSName.RESOURCES);
                            // System.out.println("resources: "+ resources);
                            // System.out.println("\n\n");
                            // COSDictionary annotBaseTemp= (COSDictionary) annot.getCOSObject();
                            // COSBase xObject = ((COSDictionary)annotBaseTemp).getItem(COSName.XOBJECT);
                            // System.out.println("xObject: "+ xObject);
                            // System.out.println("contains: "+ contains);
                            System.out.println("\n\n");
                        }

                        if (page.getAnnotations().get(i).getCOSObject() != null) {
                            COSBase base = page.getAnnotations().get(i).getCOSObject();
                            if (base != null) {
                                COSBase apDict = ((COSDictionary) base).getItem(COSName.AP);
                                if (apDict != null) {
                                    COSBase nDict = ((COSDictionary) apDict).getItem(COSName.N);
                                    if (nDict != null) {
                                        COSBase resourcesDict = nDict.getCOSObject();
                                        // COSBase resourcesDict =
                                        // ((COSDictionary)nDict).getItem(COSName.RESOURCES);
                                        // COSBase xObject =
                                        // ((COSDictionary)resourcesDict).getItem(COSName.XOBJECT);
                                        // COSBase lm0Dict = ((COSDictionary)xObject).getItem(COSName.IM);
                                        System.out.println("Page: " + i + " AP:" + apDict);
                                        System.out.println("\n\n");
                                        System.out.println("Page: " + i + " ndict: " + nDict);
                                        System.out.println("\n\n");
                                        System.out.println("Page: " + i + " resourcesDict: " + resourcesDict);
                                        System.out.println("\n\n");
                                        System.out.println("Page: " + i + " base: " + base);
                                        System.out.println("\n\n");

                                        // System.out.println("xOjbect: "+xObject);
                                    }
                                }

                            }
                        }
                    }
                }

            }

        }

0 个答案:

没有答案