有时使用zlib空白通知pdf流

时间:2015-06-05 09:35:54

标签: c++ pdf zlib inflate compression

我是一名初学程序员,试图从pdfs中提升文本流。我采用并稍微修改了一些使用zlib的开源代码,通常它运行得很好。但是,我最近一直在测试一些不同的pdf,并且一些膨胀的流返回空白。有人可以告诉我为什么吗?

我在下面遇到过这个问题,似乎解决了同样的问题,但并没有真正给出明确的答案

zLib inflate has empty result in some cases

#include <iostream>
#include <fstream>
#include <string>
#include "zlib.h"

    int main()
    {
        //Discard existing output:
        //Open the PDF source file:
        std::ifstream filei("C:\\Users\\dpbowe\\Desktop\\PIDSearch\\P&ID.PDF", std::ios::in|std::ios::binary|std::ios::ate);
        if (!filei) std::cout << "Error Opening Input File" << std::endl;

        //decoded output
        std::ofstream fileo;
        fileo.open("C:\\Users\\dpbowe\\Desktop\\Decoded.txt", std::ios::binary | std::ofstream::out);
        if (!fileother) std::cout << "Error opening output file" << std::endl;

        if (filei && fileo)
        {
            //Get the file length:
            long filelen = filei.tellg();   //fseek==0 if ok
            filei.seekg(0, std::ios::beg);

            //Read the entire file into memory (!):
            char* buffer = new char [filelen];
            if (buffer == NULL) {fputs("Memory error", stderr); exit(EXIT_FAILURE);}
            filei.read(buffer,filelen);
            if (buffer == '\0') {fputs("Reading error", stderr); exit(EXIT_FAILURE);}

            bool morestreams = true;
            //Now search the buffer repeated for streams of data
            while (morestreams)
            {
                //Search for stream, endstream. Should check the filter of the object to make sure it if FlateDecode, but skip that for now!
                size_t streamstart = FindStringInBuffer (buffer, "stream", filelen);  //This is my own search function
                size_t streamend   = FindStringInBuffer (buffer, "endstream", filelen); //This is my own search function

                if (streamstart>0 && streamend>streamstart)
                {
                    //Skip to beginning and end of the data stream:
                    streamstart += 6;

                    if (buffer[streamstart]==0x0d && buffer[streamstart+1]==0x0a) streamstart+=2;
                    else if (buffer[streamstart]==0x0a) streamstart++;

                    if (buffer[streamend-2]==0x0d && buffer[streamend-1]==0x0a) streamend-=2;
                    else if (buffer[streamend-1]==0x0a) streamend--;

                    //Assume output will fit into 10 times input buffer:
                    size_t outsize = (streamend - streamstart)*10;
                    char* output = new char [outsize]; ZeroMemory(output, outsize);

                    //Now use zlib to inflate:
                    z_stream zstrm; ZeroMemory(&zstrm, sizeof(zstrm));

                    zstrm.avail_in = streamend - streamstart + 1;
                    zstrm.avail_out = outsize;
                    zstrm.next_in = (Bytef*)(buffer + streamstart);
                    zstrm.next_out = (Bytef*)output;

                    int rsti = inflateInit(&zstrm);
                    if (rsti == Z_OK)
                    {
                        int rst2 = inflate (&zstrm, Z_FINISH);
                        if (rst2 >= 0)
                        {
                            size_t totout = zstrm.total_out;

                            //Write inflated output to file "Decoded.txt"
                            fileother<<output;
                            fileother<<"\r\nStream End\r\n\r\n";
                        }
                        else std::cout<<"output uncompressed stream is blank"<<std::endl;
                    }
                    delete[] output; output=0;
                    buffer+= streamend + 7;
                    filelen = filelen - (streamend+7);
                }
                else
                {
                    morestreams = false;
                    std::cout<<"End of File"<<std::endl;
                }
            }
            filei.close();
        }
        else
        {
            std::cout << "File Could Not Be Accessed\n";
        }
        if (fileo) fileo.close();
    }

0 个答案:

没有答案