我正在编写一个Java应用程序来充当模板读写器。我在处理文本方面取得了成功,但对图像有一些不足......
使用扩展PDFStreamEngine的类
获取图像非常简单package readingPdf;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;
public class ImageStripper extends PDFStreamEngine {
ArrayList<Object []> imagesData = null;
public ImageStripper() throws IOException {
// preparing PDFStreamEngine
addOperator(new Concatenate());
addOperator(new DrawObject());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new SetMatrix());
imagesData = new ArrayList<Object[]>();
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
if ("Do".equals(operation)) {
COSName objectName = (COSName) operands.get(0);
// get the PDF object
PDXObject xobject = getResources().getXObject(objectName);
// check if the object is an image object
if (xobject instanceof PDImageXObject) {
Object[] imageData = new Object[3];
PDImageXObject image = (PDImageXObject) xobject;
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
// position of image in the pdf in terms of user space units
System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY()
+ " in user space units");
imageData[0] = ctmNew.getTranslateX();// xPos
imageData[1] = ctmNew.getTranslateY();// yPos
imageData[2] = image;//Image
imagesData.add(imageData);
} else if (xobject instanceof PDFormXObject) {
PDFormXObject form = (PDFormXObject) xobject;
showForm(form);
}
} else {
super.processOperator(operator, operands);
}
}
public ArrayList<Object[]> getImagesList(){
return imagesData;
}
}
接下来是其实施
public class PDFManager{
private PDFParser parser;
private PDDocument pdDoc;
private PDDocument retDoc;
private COSDocument cosDoc;
private PDPage page;
private String filePath;
private File file;
public PDDocument transferImage() throws IOException {
this.pdDoc = null;
this.cosDoc = null;
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file, "r"));
parser.parse();
cosDoc = parser.getDocument();
pdDoc = new PDDocument(cosDoc);
//Get Image Data
ImageStripper imageStripper = new ImageStripper();
imageStripper.processPage(pdDoc.getPage(0));
ArrayList<Object []> imageList = imageStripper.getImagesList();
//Close Doc
pdDoc.close();
cosDoc.close();
//Create new PDF Doc
retDoc = new PDDocument();
page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
retDoc.addPage(page);
PDPageContentStream cs = new PDPageContentStream(retDoc, page, AppendMode.OVERWRITE, true);
for(int pos = 0; pos < imageList.size() ; pos++) {
Object [] imageData = imageList.get(pos);
float xPos = (float)imageData[0];
float yPos = (float)imageData[1];
PDImageXObject image = (PDImageXObject)imageData[2];
cs.drawImage(image, xPos, yPos);
}
cs.close();
return retDoc;
}
public static void main(String[] args) throws IOException {
PDFManager pdfManager = new PDFManager();
PDDocument doc =pdfManager.ToText("c:\\test\\test.pdf");
doc.save("c:\\test\\test2.pdf");
doc.close();
}
}
现在问题就出现在我写cs.drawImage
的时候。除了尝试保存新文件外,所有代码都没有任何问题执行...我得到例外COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?
我怀疑仍有元数据将图像链接到原始文档,因为调用PDImageXobject.createFromFile("c:\\test\\testImage.png", doc)
会返回一个新的PDImageXObject
实例,它可以完美地写入。当写入的PDDocument
被传递到PDImageXObject
时,我怀疑它会以某种方式链接。
我无法将图像保存到临时位置,因为这只是测试POC。
任何帮助将不胜感激
答案 0 :(得分:1)
@ Tilman Hausherr
感谢您的解决方案
我将原始文档的结束移动到一个单独的方法,我在写完文件后调用了
public void closeFiles(){
pdDoc.close();
cosDoc.close();
}