itext将PDF拆分为多个PDF,但大小相同

时间:2020-01-17 19:54:17

标签: java pdf split itext

这是我的代码,用于在按页面拆分的多个PDF中拆分单个PDF:

public static String splitAndRenamePdf(InputStream file, String targetDir) {
        try {
            PdfReader reader = new PdfReader(file);
            int n = reader.getNumberOfPages();
            for (int i=1; i <= n; i++) {
                Document document = new Document(reader.getPageSizeWithRotation(i)); //I tried with 1 too
                PdfCopy writer = new PdfCopy(document, new FileOutputStream(targetDir+File.separatorChar+i+".pdf"));
                document.open();
                PdfImportedPage page = writer.getImportedPage(reader, i);
                writer.addPage(page);
                document.close();
                writer.close();
            }
            return "from 01 to "+n;
        } catch (IOException | DocumentException exc) {
            System.out.println("splitAndRenamePdf Exception: "+exc.getMessage());
            return null;
        }
    }

内容正确,但是生成的n个文件的大小与原始大小相同。

有人可以帮助我吗?我可以更改库,因为我不是iText的继承人。

1 个答案:

答案 0 :(得分:0)

我写了解决方案 ... 我希望它可以帮助某人。

private final static RenderListener nopListener = new RenderListener() {
        @Override
        public void renderText(TextRenderInfo renderInfo) { }

        @Override
        public void renderImage(ImageRenderInfo renderInfo) { }

        @Override
        public void endTextBlock() { }

        @Override
        public void beginTextBlock() { }
    };

static class Do implements ContentOperator {
        public void invoke(PdfContentStreamProcessor processor, PdfLiteral operator, ArrayList<PdfObject> operands) {
            PdfName xobjectName = (PdfName)operands.get(0);
            names.add(xobjectName);
        }

        final List<PdfName> names = new ArrayList<>();
    }

private static void fixPdfReader(PdfReader reader) throws IOException {
        PdfContentStreamProcessor processor = new PdfContentStreamProcessor(nopListener);
        Do doOp = new Do();
        processor.registerContentOperator("Do", doOp);
        int totPages = reader.getNumberOfPages();
        for (int page = 1; page <= totPages; page++) {
            PdfDictionary resources = reader.getPageResources(page);
            if (resources == null) {
                System.out.printf("!!! page %d has no resources\n", page);
                continue;
            }
            doOp.names.clear();
            processor.processContent(ContentByteUtils.getContentBytesForPage(reader, page), resources);
            PdfDictionary newResources = new PdfDictionary();
            newResources.putAll(resources);
            PdfDictionary xobjects = newResources.getAsDict(PdfName.XOBJECT);
            PdfDictionary newXobjects = new PdfDictionary();
            for (PdfName key: doOp.names) {
                newXobjects.put(key, xobjects.get(key));
            }
            newResources.put(PdfName.XOBJECT, newXobjects);
            reader.getPageN(page).put(PdfName.RESOURCES, newResources);
        }
        reader.removeUnusedObjects();
    }

public static String fixAndSplitPDF(InputStream inputStream, String targetDir) {
        try {
            PdfReader reader = new PdfReader(inputStream);
            fixPdfReader(reader);
            //this method is in the question!
            return splitAndRenamePdf(reader, targetDir);
        } catch (IOException exc) {
            //LOG Exception...
            return null;
        }
    }