我已经构建了一种方法,试图查看给定pdf中所有嵌入图像的分辨率是否至少为300 PPI(打印价值)。它的作用是遍历页面上的每个图像,并将其宽度和高度与artbox的宽度和高度进行比较。如果每页只有一个图像,它可以成功运行,但是当有多个图像时,artbox大小包括所有图像,抛弃数字。
我希望有人可能知道如何获取绘制图像的矩形大小,以便我可以正确比较,或者是否有更简单的方法来获取图像对象的PPI(因为它将是以矩形呈现,而不是原始形式。)
这是上述方法的代码
private static bool AreImages300PPI(PdfDictionary pg)
{
var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj == null) return true;
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (!obj.IsIndirect()) continue;
var tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
var type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
var width = float.Parse(tg.Get(PdfName.WIDTH).ToString());
var height = float.Parse(tg.Get(PdfName.HEIGHT).ToString());
var artbox = (PdfArray) pg.Get(PdfName.ARTBOX);
var pdfRect = new PdfRectangle(float.Parse(artbox[0].ToString()), float.Parse(artbox[1].ToString()),
float.Parse(artbox[2].ToString()), float.Parse(artbox[3].ToString()));
if (PdfName.IMAGE.Equals(type) && (width < pdfRect.Width*300/72 || height < pdfRect.Height*300/72)
|| ((PdfName.FORM.Equals(type) || PdfName.GROUP.Equals(type)) && !AreImages300PPI(tg)))
{
return false;
}
}
return true;
}
供参考,以下是调用它的方法:
internal static List<string> GetLowResWarnings(string MergedPDFPath)
{
var returnlist = new List<string>();
using (PdfReader pdf = new PdfReader(MergedPDFPath))
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
var pg = pdf.GetPageN(pageNumber);
if (!AreImages300PPI(pg))
returnlist.Add(pageNumber.ToString());
}
}
return returnlist;
}
感谢您提供的任何帮助。
答案 0 :(得分:4)
我可以让你走完一条完全不同的道路吗?您正在查看生活在全局文件中的图像,但您没有看到它们在页面中的使用方式。
iTextSharp有一个名为iTextSharp.text.pdf.parser.PdfReaderContentParser
的班级,可以走PdfReader
并告诉你有关它的事情。您可以通过实施iTextSharp.text.pdf.parser.IRenderListener
接口来订阅信息。对于遇到的每个图像,将使用iTextSharp.text.pdf.parser.ImageRenderInfo
对象调用类的RenderImage
方法。通过此对象,您既可以获得实际图像,也可以获取当前变换矩阵,该矩阵将告诉您图像如何放入文档中。
使用此信息,您可以创建如下类:
public class MyImageRenderListener : iTextSharp.text.pdf.parser.IRenderListener {
//For each page keep a list of various image info
public Dictionary<int, List<ImageScaleInfo>> Pages = new Dictionary<int, List<ImageScaleInfo>>();
//Need to manually change the page when using this
public int CurrentPage { get; set; }
//Pass through the current page units
public Single CurrentPageUnits { get; set; }
//Not used, just interface contracts
public void BeginTextBlock() { }
public void EndTextBlock() { }
public void RenderText(iTextSharp.text.pdf.parser.TextRenderInfo renderInfo) { }
//Called for each image
public void RenderImage(iTextSharp.text.pdf.parser.ImageRenderInfo renderInfo) {
//Get the basic image info
var img = renderInfo.GetImage().GetDrawingImage();
var imgWidth = img.Width;
var imgHeight = img.Height;
img.Dispose();
//Get the current transformation matrix
var ctm = renderInfo.GetImageCTM();
var ctmWidth = ctm[iTextSharp.text.pdf.parser.Matrix.I11];
var ctmHeight = ctm[iTextSharp.text.pdf.parser.Matrix.I22];
//Create new key for our page number if it doesn't exist already
if (!this.Pages.ContainsKey(CurrentPage)) {
this.Pages.Add(CurrentPage, new List<ImageScaleInfo>());
}
//Add our image info to this page
this.Pages[CurrentPage].Add(new ImageScaleInfo(imgWidth, imgHeight, ctmWidth, ctmHeight, this.CurrentPageUnits));
}
}
它使用这个助手类来存储我们的信息:
public class ImageScaleInfo {
//The page's unit space, almost always 72
public Single PageUnits { get; set; }
//The image's actual dimensions
public System.Drawing.SizeF ImgSize { get; set; }
//How the image is placed into the page
public System.Drawing.SizeF CtmSize { get; set; }
//Automatically calculate how the image is scaled
public Single ImgWidthScale { get { return ImgSize.Width / CtmSize.Width; } }
public Single ImgHeightScale { get { return ImgSize.Height / CtmSize.Height; } }
//Helper constructor
public ImageScaleInfo(Single imgWidth, Single imgHeight, Single ctmWidth, Single ctmHeight, Single pageUnits) {
this.ImgSize = new System.Drawing.SizeF(imgWidth, imgHeight);
this.CtmSize = new System.Drawing.SizeF(ctmWidth, ctmHeight);
this.PageUnits = pageUnits;
}
}
使用它非常简单:
//Create an instance of our helper class
var imgList = new MyImageRenderListener();
//Parse the PDF and inspect each image
using (var reader = new PdfReader(testFile)) {
var proc = new iTextSharp.text.pdf.parser.PdfReaderContentParser(reader);
for (var i = 1; i <= reader.NumberOfPages; i++) {
//Get the page object itself
var p = reader.GetPageN(i);
//Get the page units. Per spec, page units are expressed as multiples of 1/72 of an inch with a default of 72.
var pageUnits = (p.Contains(PdfName.USERUNIT) ? p.GetAsNumber(PdfName.USERUNIT).FloatValue : 72);
//Set the page number so we can find it later
imgList.CurrentPage = i;
imgList.CurrentPageUnits = pageUnits;
//Process the page
proc.ProcessContent(i, imgList);
}
}
//Dump out some information
foreach (var p in imgList.Pages) {
foreach (var i in p.Value) {
Console.WriteLine(String.Format("Image PPI is {0}x{1}", i.ImgWidthScale * i.PageUnits, i.ImgHeightScale * i.PageUnits));
}
}
修改强>
从@ BrunoLowagie的评论下面我已经更新了上面的内容以删除“魔术72”并实际尝试查询文档以查看是否已被覆盖。不太可能发生,但是一两年内有人会发现一些模糊的PDF并且抱怨这段代码没有那么好用而不是抱歉。