Java MS Word和PDF转换为图像(png / jpg)

时间:2015-03-04 04:52:49

标签: java image-processing

我正在寻找一个免费 lib来从MS Word,WordPerfect和PDF转换为图像。 有人知道任何好的,最新的JAVA库吗?

2 个答案:

答案 0 :(得分:4)

将PDF转换为图片您可以使用 PDFbox

以下是使用pdfbox api

将PDF转换为图像的代码
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.imageio.ImageIO;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageNode;


public List<String> generateImages(String pdfFile) throws IOException  {

     String imagePath = "/Users/$user/pdfimages/";
     List <String> fileNames = new ArrayList<String>();
      document = PDDocument.load(pdfFile);  //// load pdf   
      node = document.getDocumentCatalog().getPages(); ///// get pages
      List<PDPage> kids = node.getKids();
      int count=0;
      for(PDPage page : kids) {   ///// iterate
           BufferedImage img = page.convertToImage(BufferedImage.TYPE_INT_RGB,128);
           File imageFile = new File(imagePath+ count++ + ".jpg");
               ImageIO.write(img, "jpg", imageFile);
               fileNames.add(imageFile.getName());     
           }
           return fileNames;   
    }

还可以使用另一个库 ApachePOI 将PDF转换为图像

以下是代码示例

import java.awt.Color;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.RenderingHints;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.hslf.model.Slide;
import org.apache.poi.hslf.usermodel.SlideShow;

public class JavaApplication12 {

public static void main(String[] args) throws FileNotFoundException, IOException {
FileInputStream is = new FileInputStream(“D:/Presentation1.ppt”);
SlideShow ppt = new SlideShow(is);
is.close();

Dimension pgsize = ppt.getPageSize();

Slide[] slide = ppt.getSlides();
for (int i = 0; i < slide.length; i++) {

BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, 1);

Graphics2D graphics = img.createGraphics();
graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
RenderingHints.VALUE_INTERPOLATION_BICUBIC);
graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS,
RenderingHints.VALUE_FRACTIONALMETRICS_ON);

graphics.setColor(Color.white);
graphics.clearRect(0, 0, pgsize.width, pgsize.height);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));

// render
slide[i].draw(graphics);

// save the output
FileOutputStream out = new FileOutputStream(“slide-” + (i + 1) + “.png”);
javax.imageio.ImageIO.write(img, “png”, out);
out.close();
}
}
}

要将 MS Word转换为图片,您可以查看已发布的问题here 使用 JODConverter

JODConverter自动完成OpenOffice.org支持的所有转换,包括

  • PDF的任何格式   o OpenDocument(文本,电子表格,演示文稿)到PDF   o Word to PDF; Excel到PDF; PowerPoint到PDF   o RTF到PDF; WordPerfect到PDF; ...
  • 等等   o OpenDocument演示文稿(odp)到Flash; PowerPoint到Flash   o RTF到OpenDocument; WordPerfect到OpenDocument   o任何HTML格式(有限制)   o支持OpenOffice.org 1.0和旧的StarOffice格式

答案 1 :(得分:0)

//DOC to .jpeg

package org.doc;

import java.io.File;

import com.aspose.words.Document;
import com.aspose.words.ImageSaveOptions;
import com.aspose.words.SaveFormat;

public class DocToImage {

    public static void main(String[] args) {

        try {
                String sourcePath = "D://G.doc";
               Document doc = new Document(sourcePath);  
               ImageSaveOptions options = new ImageSaveOptions(SaveFormat.JPEG);  
               options.setJpegQuality(100);  
               options.setResolution(100);  
               options.setUseHighQualityRendering(true);  
               for (int i = 0; i < doc.getPageCount(); i++) {  
                    String imageFilePath = "E://"+ "images" + File.separator + "img_" + i + ".jpeg";  
                    options.setPageIndex(i);  
                    doc.save(imageFilePath, options);  
               }  
               System.out.println("Done...");
          } catch (Exception e) {  
               e.printStackTrace();  
          }  
    }

}


/*Here is the link from where we can download latest Aspose word jar.
http://www.aspose.com/java/word-component.aspx*/


//PDF to .png

package org.pdf;

import java.awt.Graphics;
import java.awt.GraphicsConfiguration;
import java.awt.GraphicsDevice;
import java.awt.GraphicsEnvironment;
import java.awt.HeadlessException;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.Transparency;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;

import javax.imageio.ImageIO;
import javax.swing.ImageIcon;

import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PDFPage;

public class PdfToImage {

    public static void main(final String[] args) throws Exception {

            String storagePath = "E://dd.pdf";

            //Image Save Directory
            String realPathtopdfImageSaveDir = "E://uploads/";

            RandomAccessFile raf = new RandomAccessFile(storagePath, "r");
            FileChannel channel = raf.getChannel();
            ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
            PDFFile pdffile = new PDFFile(buf);

            int numPgs = pdffile.getNumPages();

            for (int i = 0; i < numPgs; i++) {
                PDFPage page = pdffile.getPage(i);

                Rectangle rect = new Rectangle(0, 0, (int) page.getBBox().getWidth(), (int) page.getBBox().getHeight());

                Image img = page.getImage(rect.width, rect.height, rect, null, true, true);

                // save it as a file
                BufferedImage bImg = toBufferedImage(img);
                File yourImageFile = new File(realPathtopdfImageSaveDir +File.separator + "page_" + i + ".png");

                ImageIO.write(bImg, "png", yourImageFile);
            }

    }

    public static BufferedImage toBufferedImage(Image image) {
        if (image instanceof BufferedImage) {
            return (BufferedImage) image;
        }

        image = new ImageIcon(image).getImage();

        BufferedImage bimage = null;
        GraphicsEnvironment ge = GraphicsEnvironment.getLocalGraphicsEnvironment();
        try {
            int transparency = Transparency.OPAQUE;

            GraphicsDevice gs = ge.getDefaultScreenDevice();
            GraphicsConfiguration gc = gs.getDefaultConfiguration();
            bimage = gc.createCompatibleImage(image.getWidth(null), image.getHeight(null), transparency);
        } catch (HeadlessException e) {
            System.out.println("The system does not have a screen");
        }

        if (bimage == null) {
            int type = BufferedImage.TYPE_INT_RGB;
            bimage = new BufferedImage(image.getWidth(null), image.getHeight(null), type);
        }

        Graphics g = bimage.createGraphics();

        g.drawImage(image, 0, 0, null);
        g.dispose();

        return bimage;
    }
}

// jar required pdf-renderer.jar