pdfbox将pdf转换为图像字节[]

时间:2013-02-22 21:03:05

标签: java pdfbox

使用pdfbox,是否可以将PDF(或PDF字节[])转换为图像字节[]?我在网上查看了几个例子,我发现的唯一例子描述了如何直接将转换后的文件写入文件系统或将其转换为Java AWT对象。

我宁愿不招致将图像文件写入文件系统的IO,读入byte [],然后删除它。

所以我可以这样做:

String destinationImageFormat = "jpg";
boolean success = false;
InputStream is = getClass().getClassLoader().getResourceAsStream("example.pdf");
PDDocument pdf = PDDocument.load( is, true );

int resolution = 256;
String password = "";
String outputPrefix = "myImageFile";

PDFImageWriter imageWriter = new PDFImageWriter();    

success = imageWriter.writeImage(pdf, 
                    destinationImageFormat, 
                    password, 
                    1, 
                    2, 
                    outputPrefix, 
                    BufferedImage.TYPE_INT_RGB, 
                    resolution);

以及:

InputStream is = getClass().getClassLoader().getResourceAsStream("example.pdf");

PDDocument pdf = PDDocument.load( is, true );
List<PDPage> pages = pdf.getDocumentCatalog().getAllPages();

for ( PDPage page : pages )
{
    BufferedImage image = page.convertToImage();
}

我不清楚如何将BufferedImage转换为byte []。我知道这会转换为imageWriter.writeImage()中的文件输出流,但我不清楚API是如何工作的。

3 个答案:

答案 0 :(得分:11)

您可以使用ImageIO.write写入OutputStream。要获取byte [],请使用ByteArrayOutputStream,然后在其上调用toByteArray()。

答案 1 :(得分:0)

 try {           
                PDDocument document = PDDocument.load(PdfInfo.getPDFWAY());
                if (document.isEncrypted()) {
                    document.decrypt(PdfInfo.getPASSWORD());
                }
                if ("bilevel".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE( BufferedImage.TYPE_BYTE_BINARY);
                } else if ("indexed".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_BYTE_INDEXED);
                } else if ("gray".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_BYTE_GRAY);
                } else if ("rgb".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_INT_RGB);
                } else if ("rgba".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_INT_ARGB);
                } else {
                    System.exit(2);
                }
                PDFImageWriter imageWriter = new PDFImageWriter();
                boolean success = imageWriter.writeImage(document, PdfInfo.getIMAGE_FORMAT(),PdfInfo.getPASSWORD(),
                        PdfInfo.getSTART_PAGE(),PdfInfo.getEND_PAGE(),PdfInfo.getOUTPUT_PREFIX(),PdfInfo.getIMAGETYPE(),PdfInfo.getRESOLUTION());
                if (!success) {
                    System.exit(1);
                }
                document.close();

        } catch (IOException | CryptographyException | InvalidPasswordException ex) {
            Logger.getLogger(PdfToImae.class.getName()).log(Level.SEVERE, null, ex);
        }
public class PdfInfo {
    private static String PDFWAY;    
    private static String OUTPUT_PREFIX;
    private static String PASSWORD;
    private static int START_PAGE=1;
    private static int END_PAGE=Integer.MAX_VALUE;
    private static String IMAGE_FORMAT="jpg";
    private static String COLOR="rgb";
    private static int RESOLUTION=256;
    private static int IMAGETYPE=24;
    private static String filename;
    private static String filePath="";
}

答案 2 :(得分:0)

添加maven依赖:

    <!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
    <dependency>
        <groupId>org.apache.pdfbox</groupId>
        <artifactId>pdfbox</artifactId>
        <version>2.0.1</version>
    </dependency>

然后,将pdf转换为图像:

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;

private List<String> savePDF(String filePath) throws IOException {
    List<String> result = Lists.newArrayList();

    File file = new File(filePath);

    PDDocument doc = PDDocument.load(file);
    PDFRenderer renderer = new PDFRenderer(doc);

    int pageSize = doc.getNumberOfPages();
    for (int i = 0; i < pageSize; i++) {
        String pngFileName = file.getPath() + "." + (i + 1) + ".png";

        FileOutputStream out = new FileOutputStream(pngFileName);
        ImageIO.write(renderer.renderImageWithDPI(i, 96), "png", out);
        out.close();

        result.add(pngFileName);
    }
    doc.close();
    return result;
}

编辑:

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;

private List<String> savePDF(String filePath) throws IOException {
    List<String> result = Lists.newArrayList();

    File file = new File(filePath);

    PDDocument doc = PDDocument.load(file);
    PDFRenderer renderer = new PDFRenderer(doc);

    int pageSize = doc.getNumberOfPages();
    for (int i = 0; i < pageSize; i++) {
        String pngFileName = file.getPath() + "." + (i + 1) + ".png";

        ByteArrayOutputStream out = new ByteArrayOutputStream(pngFileName);
        ImageIO.write(renderer.renderImageWithDPI(i, 96), "png", out);

        out.toByteArray(); // here you can get a byte array

        out.close();

        result.add(pngFileName);
    }
    doc.close();
    return result;
}