从pdf文件中提取的图像是反转/负像。(C#)

时间:2011-02-01 04:01:23

标签: c# vb.net pdf

您好我正在使用pdf小丑库(C#)进行pdf到xhtml转换。我正在提取pdf内容,我提取的图像是倒置图像。我想要真实图像,请提出任何建议吗?...

代码粘贴在下面,

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;

using it.stefanochizzolini.clown.documents;
using it.stefanochizzolini.clown.files;
using it.stefanochizzolini.clown.documents.contents;
using it.stefanochizzolini.clown.documents.contents.objects;
using it.stefanochizzolini.clown.tools;
using it.stefanochizzolini.clown.documents.contents.composition;
using it.stefanochizzolini.clown.documents.contents.fonts;

using System.IO;
using it.stefanochizzolini.clown.bytes;
using it.stefanochizzolini.clown.objects;

namespace Test1
{

    public partial class Form1 : Form
    {
        public int index;
        public Form1()
        {
            InitializeComponent();
        }

        private void btnBrowse_Click(object sender, EventArgs e)
        {
            openFileDialog1.Filter = "PDF Files|*.pdf";
            openFileDialog1.ShowDialog();
            txtFileName.Text = openFileDialog1.FileName;
        }

        private void txtFileName_TextChanged(object sender, EventArgs e)
        {
            if (txtFileName.Text.Trim().Length > 0 && txtOutputPath.Text.Trim().Length > 0)
            {
                btnProcess.Enabled = true;
            }
            else { btnProcess.Enabled = false; }
        }

        private void btnProcess_Click(object sender, EventArgs e)
        {
            InitiateProcess(txtFileName.Text.Trim());
        }

        private void InitiateProcess(string FileName)
        {
            it.stefanochizzolini.clown.files.File file;
            Document document;
            try
            {
                // Open the PDF file!
                file = new it.stefanochizzolini.clown.files.File(FileName);

                // Get the PDF document!
                document = file.Document;
                //file.Dispose();

            }
            catch
            {
                MessageBox.Show("Sorry. Error in File");

                return;
            }

            //Page stamper is used to draw contents on existing pages.
            PageStamper stamper = new PageStamper();


            foreach (Page page in document.Pages)
            {
                txtOutput.Text = txtOutput.Text + Environment.NewLine + Environment.NewLine + "\nScanning page " + (page.Index + 1) + "...\n";

                stamper.Page = page;

                // Wraps the page contents into a scanner.
                Extract(new ContentScanner(page), stamper.Foreground, page );

                stamper.Flush();
                this.Refresh();
            }


        }

        private void Extract(ContentScanner level, PrimitiveFilter builder, Page page)
        {
            string ctype = string.Empty;
            if (level == null)
                return;

            while (level.MoveNext())
            {
                ContentObject content = level.Current;

                //TextExtractor extractor = new TextExtractor();

                string aa = content.GetType().ToString();
                /*if(content is Text)
                {
                txtOutput.Text = txtOutput.Text + aa + Environment.NewLine;
                 List<ITextString> textStrings = extractor.Extract(page)[TextExtractor.DefaultArea];
                foreach(ITextString textstring in textStrings)
                {
                    RectangleF textstringbox=textstring.Box.Value;
                    txtOutput.Text = txtOutput.Text + "x:" + Math.Round(textstringbox.X);
                    txtOutput.Text = txtOutput.Text + "y:" + Math.Round(textstringbox.Y) + ",";
                    txtOutput.Text = txtOutput.Text + "w:" + Math.Round(textstringbox.Width) + ",";
                    txtOutput.Text = txtOutput.Text + "h:" + Math.Round(textstringbox.Height);
              txtOutput.Text=txtOutput.Text+ "]: " + textstring.Text.ToString();

                }
                }*/
                if (content is Text)
                {
                    ContentScanner.TextWrapper text = (ContentScanner.TextWrapper)level.CurrentWrapper;
                    //ContentScanner.GraphicsState test = level.getState();
                    foreach (ContentScanner.TextStringWrapper textString in text.TextStrings)
                    {
                        RectangleF rf = (RectangleF)textString.Box;
                        /*txtOutput.Text = txtOutput.Text + Environment.NewLine + "Text [font size: " + textString.Style.FontSize + " ],[font Name: " +
                            textString.Style.Font.Name + " ]: " + textString.Text + "[position = left :" + rf.Left.ToString() + " & Top: " + rf.Top.ToString() + "X:" + rf.X.ToString() + "Y:" + rf.Y.ToString();*/

                        txtOutput.Text = txtOutput.Text + Environment.NewLine + textString.Text;

                    }

                }

                 //else if (content is MarkedContent)
                //{
                //    //MarkedContent mc = (MarkedContent) content
                //}
                else if (content is ShowText)
                {
                    it.stefanochizzolini.clown.documents.contents.fonts.Font font = level.State.Font;
                    txtOutput.Text = txtOutput.Text + (font.Decode(((ShowText)content).Text));

                }

                else if (content is ContainerObject)
                {
                    // Scan the inner level!
                    Extract(level.ChildLevel, builder, page);
                }
                else if (content is it.stefanochizzolini.clown.documents.contents.objects.InlineImage)
                {
                    ContentScanner.InlineImageWrapper img = (ContentScanner.InlineImageWrapper)level.CurrentWrapper;
                    ExportImage(
                        img.InlineImage.Body.Value,
                        txtOutputPath.Text.Trim() + System.IO.Path.DirectorySeparatorChar + "ImageExtractionSample_" + (index++) + ".jpg"
                        );
                }

                else if (content is it.stefanochizzolini.clown.documents.contents.objects.GraphicsObject)
                {
                    /*=============================================================================
                     *  TO EXTRACT THE GRAPHICAL OBJECT WITHIN THE PDF
                     =============================================================================*/
                    ContentScanner.GraphicsObjectWrapper objectWrapper = level.CurrentWrapper;
                    if (objectWrapper == null)
                    {
                        continue;
                    }


                    /*
                      NOTE: Images can be represented on a page either as
                      external objects (XObject) or inline objects.
                    */
                    SizeF? imageSize = null; // Image native size.
                    /*if (objectWrapper is PdfDataObject)
                    {
                     ContentScanner.GraphicsObjectWrapper gobjectwrapper=(ContentScanner.GraphicsObjectWrapper)gobjectwrapper;
                        it.stefanochizzolini.clown.objects.PdfDataObject pdobjt=gobjectwrapper
                    }*/
                    //if(objectWrapper is Image)
                    //{
                    //    }
                    if (objectWrapper is ContentScanner.XObjectWrapper)
                    {
                        ContentScanner.XObjectWrapper xObjectWrapper = (ContentScanner.XObjectWrapper)objectWrapper;
                        it.stefanochizzolini.clown.documents.contents.xObjects.XObject Xobject = xObjectWrapper.XObject;
                        // Is the external object an image?
                        if (Xobject is it.stefanochizzolini.clown.documents.contents.xObjects.ImageXObject)
                        {
                            txtOutput.Text = txtOutput.Text + Environment.NewLine +
                              "External Image '" + xObjectWrapper.Name + "' (" + Xobject.BaseObject + ")"; // Image key and indirect reference.

                            imageSize = Xobject.Size; // Image native size.

                            PdfDataObject dataObject = Xobject.BaseDataObject;
;
                            PdfDictionary header = ((PdfStream)dataObject).Header;
                            if (header.ContainsKey(PdfName.Type) && header[PdfName.Type].Equals(PdfName.XObject) && header[PdfName.Subtype].Equals(PdfName.Image))
                            {
                                if (header[PdfName.Filter].Equals(PdfName.Image)) // JPEG image.
                                {
                                    // Get the image data (keeping it encoded)!
                                 IBuffer body1 = ((PdfStream)dataObject).GetBody(false);
                                 //object h1 = PdfName.ColorSpace;



                                    // Export the image!

                                    ExportImage(
                                      body1,
                                      txtOutputPath.Text + System.IO.Path.DirectorySeparatorChar + "Image_" + (index++) + ".png"
                                      );
                                }
                            }



                        }


                        else if (content is it.stefanochizzolini.clown.documents.interaction.annotations.Link)
                        {
                            Dictionary<RectangleF?, List<ITextString>> textStrings = null;
                            PageAnnotations annotations = page.Annotations;
                            TextExtractor extractor = new TextExtractor();

                            if (annotations == null)
                            {
                                Console.WriteLine("No annotations here.");
                                continue;
                            }

                            foreach (it.stefanochizzolini.clown.documents.interaction.annotations.Annotation annotation in annotations)
                            {
                                if (annotation is it.stefanochizzolini.clown.documents.interaction.annotations.Link)
                                {

                                    if (textStrings == null)
                                    { textStrings = extractor.Extract(page); }

                                    it.stefanochizzolini.clown.documents.interaction.annotations.Link link = (it.stefanochizzolini.clown.documents.interaction.annotations.Link)annotation;
                                    RectangleF linkBox = link.Box;
                                    StringBuilder linkTextBuilder = new StringBuilder();
                                    foreach (ITextString linkTextString in extractor.Filter(textStrings, linkBox))
                                    { linkTextBuilder.Append(linkTextString.Text); }
                                    string bb = linkTextBuilder.ToString();
                                    txtOutput.Text = txtOutput.Text + "Link '" + linkTextBuilder.ToString();
                                    txtOutput.Text = txtOutput.Text + "    Position: "
                                        + "x:" + Math.Round(linkBox.X) + ","
                                        + "y:" + Math.Round(linkBox.Y) + ","
                                        + "w:" + Math.Round(linkBox.Width) + ","
                                        + "h:" + Math.Round(linkBox.Height);

                                }

                            }
                        }


                        else if (objectWrapper is ContentScanner.InlineImageWrapper)
                        {
                            txtOutput.Text = txtOutput.Text + Environment.NewLine + "Inline Image";
                            InlineImage inlineImage = ((ContentScanner.InlineImageWrapper)objectWrapper).InlineImage;
                            imageSize = inlineImage.Size; // Image native size.

                        }

                        if (imageSize.HasValue)
                        {
                            RectangleF box = objectWrapper.Box.Value; // Image position (location and size) on the page.
                            txtOutput.Text = txtOutput.Text + Environment.NewLine +
                              " on page " + (page.Index + 1) + " (" + page.BaseObject + ")"; // Page index and indirect reference.

                            txtOutput.Text = txtOutput.Text + Environment.NewLine + "  Coordinates:";
                            txtOutput.Text = txtOutput.Text + Environment.NewLine + "     x: " + Math.Round(box.X);
                            txtOutput.Text = txtOutput.Text + Environment.NewLine + "     y: " + Math.Round(box.Y);
                            txtOutput.Text = txtOutput.Text + Environment.NewLine + "     width: " + Math.Round(box.Width) + " (native: " + Math.Round(imageSize.Value.Width) + ")";
                            txtOutput.Text = txtOutput.Text + Environment.NewLine + "     height: " + Math.Round(box.Height) + " (native: " + Math.Round(imageSize.Value.Height) + ")";
                        }

                        /*=============================================================================*/
                    }

                }
            }
        }




        private void ExportImage(IBuffer data, string outputPath)
        {
            FileStream outputStream;
            try
            { outputStream = new FileStream(outputPath, FileMode.CreateNew); }
            catch (Exception e)
            { throw new Exception(outputPath + " file couldn't be created.", e); }

            try
            {
                BinaryWriter writer = new BinaryWriter(outputStream);
                //ImageConverter ic = new ImageConverter();
                //Image img = (Image)ic.ConvertFrom(data);

                writer.Write(data.ToByteArray());
                writer.Close();
                outputStream.Close();
            }
            catch (Exception e)
            { throw new Exception(outputPath + " file writing has failed.", e); }

            Console.WriteLine("Output: " + outputPath);
        }

        private void txtOutputPath_TextChanged(object sender, EventArgs e)
        {
            if (txtFileName.Text.Trim().Length > 0 && txtOutputPath.Text.Trim().Length > 0)
            {
                btnProcess.Enabled = true;
            }
            else { btnProcess.Enabled = false; }
        }

        private void btnBrowseOutput_Click(object sender, EventArgs e)
        {
            folderBrowserDialog1.ShowDialog();
            txtOutputPath.Text = folderBrowserDialog1.SelectedPath;


        }

        private void btnsave_Click(object sender, EventArgs e)
        {
            if(txtOutput.Text=="")
            {
                MessageBox.Show("text box is empty");
            }
            else
            {
                saveFileDialog1.Filter = "Text Files (*.txt)|*.txt|RTF Files (*.rtf)|*.rtf";

                if (saveFileDialog1.ShowDialog() == DialogResult.OK)
                {
                    StreamWriter yaz = new StreamWriter(saveFileDialog1.FileName.ToString());
                    string yasoutput = txtOutput.Text;
                    yaz.Write(yasoutput);
                }
            }


        }

        private void btncancel_Click(object sender, EventArgs e)
        {
            this.Close();
        }

        private void btnclear_Click(object sender, EventArgs e)
        {
            txtFileName.Text = "";
            txtOutput.Text = "";
            txtOutputPath.Text = "";

        }


    }


}

2 个答案:

答案 0 :(得分:1)

您需要反转图像缓冲区,即将您收到的图像缓冲区复制到从图像缓冲区的最后一个字节开始到图像缓冲区的第一个字节的新缓冲区。这应该照顾倒像。对于每个像素数据,假设24位RGB值,交换R和B值,使得RGB变为BGR。这应该摆脱'负面'效应并保存这个新的缓冲区。

答案 1 :(得分:0)

将位图图像传递给以下函数...

private byte[] BmpToBytes_Unsafe (Bitmap bmp)
{

        BitmapData bData = bmp.LockBits(new Rectangle(0,0,IMAGE_WIDTH,IMAGE_HEIGHT),
            ImageLockMode.ReadOnly,
            PixelFormat.Format24bppRgb);

        int lineSize = bData.Width * 3;
        int byteCount = lineSize * bData.Height;
        byte[] bmpBytes = new byte[byteCount];
        byte[] tempLine = new byte[lineSize];
        int bmpIndex = 0;

        IntPtr scan = new IntPtr(bData.Scan0.ToInt32() + (lineSize * (bData.Height-1)));

        for (int i = 0; i < bData.Height; i++)
        {
            Marshal.Copy(scan, tempLine, 0, lineSize);
            scan = new IntPtr(scan.ToInt32()-bData.Stride);
            tempLine.CopyTo(bmpBytes, bmpIndex);
            bmpIndex += lineSize;
        }

        bmp.UnlockBits(bData);

        return bmpBytes;
}

快乐的编码......:)