我想从某些PDF文件中导出一些图像,为此我应该使用PdfSharp库。 我在Web上找到了从一个PDF导出图像文件的代码,但如果图像是由DCTDecode编写的,我没有任何问题。如果图像是由FlatDecode模式编码,我无法导出此图像。
所以这就是代码:
static void Main(string[] args)
{
//estrapolare immagine da pdf
const string filename = "d://eresult.pdf";
PdfDocument document = PdfReader.Open(filename);
int imageCount = 0;
// Iterate pages
foreach (PdfPage page in document.Pages)
{
// Get resources dictionary
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
ICollection<PdfItem> items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
ExportImage(xObject, ref imageCount);
}
}
}
}
}
}
System.Diagnostics.Debug.Write(imageCount + " images exported.", "Export Images");
}
static void ExportImage(PdfDictionary image, ref int count)
{
string filter = image.Elements.GetName("/Filter");
switch (filter)
{
case "/DCTDecode":
ExportJpegImage(image, ref count);
break;
case "/FlateDecode":
ExportAsPngImage(image, ref count);
break;
}
}
static void ExportJpegImage(PdfDictionary image, ref int count)
{
// Fortunately JPEG has native support in PDF and exporting an image is just writing the stream to a file.
byte[] stream = image.Stream.Value;
FileStream fs = new FileStream(String.Format("Image{0}.jpeg", count++), FileMode.Create, FileAccess.Write);
BinaryWriter bw = new BinaryWriter(fs);
bw.Write(stream);
bw.Close();
}
static void ExportAsPngImage(PdfDictionary image, ref int count)
{
int width = image.Elements.GetInteger(PdfImage.Keys.Width);
int height = image.Elements.GetInteger(PdfImage.Keys.Height);
int bitsPerComponent = image.Elements.GetInteger(PdfImage.Keys.BitsPerComponent);
PdfSharp.Pdf.Filters.FlateDecode flate = new PdfSharp.Pdf.Filters.FlateDecode();
byte[] decodedBytes = flate.Decode(image.Stream.Value);
System.Drawing.Imaging.PixelFormat pixelFormat;
switch (bitsPerComponent)
{
case 1:
pixelFormat = PixelFormat.Format1bppIndexed;
break;
case 8:
pixelFormat = PixelFormat.Format8bppIndexed;
break;
case 24:
pixelFormat = PixelFormat.Format24bppRgb;
break;
default:
throw new Exception("Unknown pixel format " + bitsPerComponent);
}
Bitmap bmp = new Bitmap(width, height, pixelFormat);
var bmpData = bmp.LockBits(new Rectangle(0, 0, width, height), ImageLockMode.WriteOnly, pixelFormat);
int length = (int)Math.Ceiling(width * bitsPerComponent / 8.0);
for (int i = 0; i < height; i++)
{
int offset = i * length;
int scanOffset = i * bmpData.Stride;
Marshal.Copy(decodedBytes, offset, new IntPtr(bmpData.Scan0.ToInt32() + scanOffset), length);
}
bmp.UnlockBits(bmpData);
using (FileStream fs = new FileStream(@"D:\\" + String.Format("Image{0}.png", count++), FileMode.Create, FileAccess.Write))
{
bmp.Save(fs, System.Drawing.Imaging.ImageFormat.Png);
}
// TODO: You can put the code here that converts vom PDF internal image format to a Windows bitmap
// and use GDI+ to save it in PNG format.
// It is the work of a day or two for the most important formats. Take a look at the file
// PdfSharp.Pdf.Advanced/PdfImage.cs to see how we create the PDF image formats.
// We don't need that feature at the moment and therefore will not implement it.
// If you write the code for exporting images I would be pleased to publish it in a future release
// of PDFsharp.
}
使用此代码,我可以在这种奇怪模式下看到图像:
但是Pdf文件中的图像是:
如您所见,颜色太不相同
答案 0 :(得分:0)
图像数据和调色板是PDF文件中的不同对象。图像可以有掩模,这些也可以是不同的对象。
将图像数据保存到PNG文件时,您可能还需要获取调色板并将颜色数据包含在PNG文件中。
也许PDFsharp论坛上显示的代码比代码更好:
http://forum.pdfsharp.net/viewtopic.php?p=6755#p6755