Question

我参与编写的C＃软件有一个组件，涉及从扫描文档中读取条形码。 PDF文件本身使用PDFSharp打开。

不幸的是，当涉及到Flate Decoding of PDF时，我们遇到了该过程的问题。基本上，我们得到的是一堆模糊，这意味着没有条形码可以检查，文件无法识别。

我们的代码（我们从另一个Stack Overflow案例中无耻地“借用”）如下：

private FileInfo ExportAsPngImage(PdfDictionary image, string sourceFileName, ref int count)
    {
        //This code basically comes from http://forum.pdfsharp.net/viewtopic.php?f=2&t=2338#p6755 
        //and http://stackoverflow.com/questions/10024908/how-to-extract-flatedecoded-images-from-pdf-with-pdfsharp
        string tempFile = string.Format("{0}_Image{1}.png", sourceFileName, count);

        int width = image.Elements.GetInteger(PdfImage.Keys.Width);
        int height = image.Elements.GetInteger(PdfImage.Keys.Height);
        int bitsPerComponent = image.Elements.GetInteger(PdfImage.Keys.BitsPerComponent);
        var pixelFormat = new PixelFormat();

        switch (bitsPerComponent)
        {
            case 1:
                pixelFormat = System.Drawing.Imaging.PixelFormat.Format1bppIndexed;
                break;
            case 8:
                pixelFormat = System.Drawing.Imaging.PixelFormat.Format8bppIndexed;
                break;
            case 24:
                pixelFormat = System.Drawing.Imaging.PixelFormat.Format24bppRgb;
                break;
            default:
                throw new Exception("Unknown pixel format " + bitsPerComponent);
        }

        var fd = new FlateDecode();
        byte[] decodedBytes = fd.Decode(image.Stream.Value);
        byte[] resultBytes = null;
        int newWidth = width;
        int alignment = 4;

        if (newWidth % alignment != 0)
        //Image data in BMP files always starts at a DWORD boundary, in PDF it starts at a BYTE boundary.
        //Most images have a width that is a multiple of 4, so there is no problem with them.
        //You must copy the image data line by line and start each line at the DWORD boundary.
        {
            while (newWidth % alignment != 0)
            {
                newWidth++;
            }

            var copy_dword_boundary = new byte[height, newWidth];
            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < newWidth; x++)
                {
                    if (x <= width && (x + (y * width) < decodedBytes.Length))
                        // while not at end of line, take orignal array
                        copy_dword_boundary[y, x] = decodedBytes[x + (y * width)];
                    else //fill new array with ending 0
                        copy_dword_boundary[y, x] = 0;
                }
            }
            resultBytes = new byte[newWidth * height];

            int counter = 0;
            for (int x = 0; x < copy_dword_boundary.GetLength(0); x++)
            {
                for (int y = 0; y < copy_dword_boundary.GetLength(1); y++)
                {   //put 2dim array back in 1dim array
                    resultBytes[counter] = copy_dword_boundary[x, y];
                    counter++;
                }
            }
        }
        else
        {
            resultBytes = new byte[decodedBytes.Length];
            decodedBytes.CopyTo(resultBytes, 0);
        }

        //Create a new bitmap and shove the bytes into it
        var bitmap = new Bitmap(newWidth, height, pixelFormat);
        BitmapData bitmapData = bitmap.LockBits(new Rectangle(0, 0, bitmap.Width, bitmap.Height), ImageLockMode.WriteOnly, bitmap.PixelFormat);
        int length = (int)Math.Ceiling(width * bitsPerComponent / 8.0);

        for (int i = 0; i < height; i++)
        {
            int offset = i * length;
            int scanOffset = i * bitmapData.Stride;
            Marshal.Copy(resultBytes, offset, new IntPtr(bitmapData.Scan0.ToInt32() + scanOffset), length);
        }
        bitmap.UnlockBits(bitmapData);

        //Now save the bitmap to memory
        using (var fs = new FileStream(String.Format(tempFile, count++), FileMode.Create, FileAccess.Write))
        {
            bitmap.Save(fs, ImageFormat.Png);
        }

        return new FileInfo(tempFile);
    }

不幸的是，我们得到的就是http://i.stack.imgur.com/FwatQ.png

我们非常感谢任何关于我们哪里出错的想法，或对我们可能会尝试的事情的建议。

干杯

Answer 1

感谢你们的建议。其中一个开发人员设法破解了它 - 它（正如Jongware建议的那样）是一个JPEG，但它实际上也是拉链的！解压缩后，可以将其处理并识别为正常。

通过Flate解码从PDF中提取的PNG无法识别 - C＃

1 个答案: