使用pdfbox将彩色PDF转换为b / w tiff

时间:2016-05-04 20:02:50

标签: pdf pdfbox tiff image-conversion

将一些彩色PDF转换为tiff图像时遇到了一些问题。我遇到问题的PDF有用蓝色墨水书写的手写签名。这些签名不会出现在生成的二进制tiff中。我怀疑某处有一个阈值可以确定哪些像素是黑色的,哪些像素是白色的。

@SuppressWarnings("serial")
private static void convertPdfToTiff(final File pdf, final File tif) throws Exception {
    try 
    {
        final Iterator<ImageWriter> imageWriterIterator = ImageIO.getImageWritersByFormatName("TIF");
        final ImageWriter imageWriter = imageWriterIterator.hasNext() ? imageWriterIterator.next() : null;

        final TIFFImageWriteParam writeParam = new TIFFImageWriteParam(Locale.getDefault());
        writeParam.setCompressionMode(TIFFImageWriteParam.MODE_EXPLICIT);
        writeParam.setCompressionType("LZW");

        PDDocument pdfDocument = PDDocument.load(pdf);
        PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument);

        OutputStream out = new FileOutputStream(tif);
        final BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out);
        final ImageOutputStream imageOutputStream = ImageIO.createImageOutputStream(bufferedOutputStream);
        imageWriter.setOutput(imageOutputStream);
        imageWriter.prepareWriteSequence(null);

        int pageCounter = 0;
        for (PDPage page : pdfDocument.getPages()) 
        {
            BufferedImage image = pdfRenderer.renderImageWithDPI(pageCounter, 300, ImageType.BINARY);

            final IIOImage s = new IIOImage(image, null, new TIFFImageMetadata(new TIFFIFD(new Vector<BaselineTIFFTagSet>() 
            {
                {
                       add(BaselineTIFFTagSet.getInstance());
                }
            }))) 

            {   
                {
                       final TIFFImageMetadata tiffMetadata = (TIFFImageMetadata) getMetadata();
                       final TIFFIFD rootIFD = tiffMetadata.getRootIFD();
                       final BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                }
            };

            imageWriter.writeToSequence(s, writeParam);
            pageCounter++;
        }

        imageWriter.dispose();
        imageOutputStream.flush();
        imageOutputStream.close();
        bufferedOutputStream.flush();
        bufferedOutputStream.close();
        pdfDocument.close();
        out.flush();
        out.close();
    } 
    catch (Exception e) 
    {
        e.printStackTrace();
        throw e;
    }
}

2 个答案:

答案 0 :(得分:2)

前段时间我遇到了同样的问题(蓝色签名),我这样做了:

  • 渲染为RGB
  • 使用JH Labs过滤器转换为黑白(我在this answer中的评论中指出了这一点)
  • 我最初尝试过抖动和扩散滤波器
  • 最适合我的过滤器是gain filterdiffusion filter的偏差部分(我认为我使用了0.3)。
  • 您可以将两个过滤器与compound filter结合使用。
  • jhlabs的东西不能用作.jar文件,但你可以下载源并将它添加到你的项目中
  • some examples

顺便说一句,保存你的文件不是LZW,而是G4,这会让它们变小。 PDFBox具有有效保存到图像中的方法,请参阅here。如果您的BufferedImage类型为BITONAL,ImageIOUtil.writeImage()将保存为G4压缩的TIFF。

答案 1 :(得分:0)

我最终将图像渲染为灰度图像并将其绘制为第二个bw图像。

@SuppressWarnings("serial")
private static void convertPdfToTiff(final File pdf, final File tif) throws Exception {
    try 
    {
        final Iterator<ImageWriter> imageWriterIterator = ImageIO.getImageWritersByFormatName("TIF");
        final ImageWriter imageWriter = imageWriterIterator.hasNext() ? imageWriterIterator.next() : null;

        final TIFFImageWriteParam writeParam = new TIFFImageWriteParam(Locale.getDefault());
        writeParam.setCompressionMode(TIFFImageWriteParam.MODE_EXPLICIT);
        writeParam.setCompressionType("CCITT T.6");

        PDDocument pdfDocument = PDDocument.load(pdf);
        PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument);

        OutputStream out = new FileOutputStream(tif);
        final BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out);
        final ImageOutputStream imageOutputStream = ImageIO.createImageOutputStream(bufferedOutputStream);
        imageWriter.setOutput(imageOutputStream);
        imageWriter.prepareWriteSequence(null);

        int pageCounter = 0;
        for (PDPage page : pdfDocument.getPages()) 
        {
            BufferedImage image = pdfRenderer.renderImageWithDPI(pageCounter, 300, ImageType.GRAY);
            BufferedImage image2 = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
            Graphics2D g = image2.createGraphics();
            g.drawRenderedImage(image, null);
            g.dispose();

            final IIOImage s = new IIOImage(image2, null, new TIFFImageMetadata(new TIFFIFD(new Vector<BaselineTIFFTagSet>() 
            {
                {
                       add(BaselineTIFFTagSet.getInstance());
                }
            }))) 

            {   
                {
                       final TIFFImageMetadata tiffMetadata = (TIFFImageMetadata) getMetadata();
                       final TIFFIFD rootIFD = tiffMetadata.getRootIFD();
                       final BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                       rootIFD.addTIFFField(new TIFFField(base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION), TIFFTag.TIFF_RATIONAL, 1, new long[][] { { 300, 1 } }));
                }
            };

            imageWriter.writeToSequence(s, writeParam);
            pageCounter++;
        }

        imageWriter.dispose();
        imageOutputStream.flush();
        imageOutputStream.close();
        bufferedOutputStream.flush();
        bufferedOutputStream.close();
        pdfDocument.close();
        out.flush();
        out.close();
    } 
    catch (Exception e) 
    {
        e.printStackTrace();
        throw e;
    }
}