解析* .chk文件并从中提取* .docx文件

时间:2014-08-28 06:55:32

标签: c++ parsing

我需要解析650 MB的* .chk文件大小,并通过查找十六进制中的标题和尾部来提取* .docx文件(文件在二进制流中打开)。此代码仅收到前4个字符且无法读取。请帮助!

#include <iostream>
#include <istream>
#include <ostream>
#include <fstream>
#include <string>

using namespace std;

int main()
{
    cout << "Please, enter the file name: ";
    char line[255];
    cin >> line;
    cout << "Please, enter where to put output with filename without . and file format!: ";
    char outp[255];
    int const req=702545920;
    char * thebuff;
    thebuff = new char [req];
    cin >> outp;
    long i=48,j(0);
    ifstream mf;
    mf.open(line, ios::binary);
    char t[7]={};
    char trailer[4]={};
    int64_t start(0),endd(0);
    mf.get(thebuff,req); //doesn't work as I need!_gets only first 4 chars instead of the whole file;
    while (j<=req) // here it gets because of the reason above into an endless loop;
    {
        long temp=j;
        if ((thebuff[j]==(char)0x50) && (thebuff[j+1]==(char)0x4B) && (thebuff[j+2]==(char)0x03) && (thebuff[j+3]==(char)0x04) && (thebuff[j+4]==(char)0x14) && (thebuff[j+5]==(char)0x00) && (thebuff[j+6]==(char)0x06) && (thebuff[j+7]==(char)0x00))
        {
            start = j;
            bool tt(false);
            while (temp<=req && !tt)
            {
                for (int k=0;i<=3;i++)
                {
                    trailer[k] = thebuff[temp+k];
                };
                if ((trailer[0]==(char)0x50) && (trailer[1]==(char)0x4B) && (trailer[2]==(char)0x05) && (trailer[3]==(char)0x06))
                {
                    endd = temp + (long)18;
                    tt = true;
                };
            };
            if (tt)
            {
                start -= 8;
                int bufsize = endd-start;
                char * buffer;
                buffer = new char [bufsize];
                buffer = {};
                int ff(1);
                for (int f=start;f<=endd;f++)
                {
                    buffer[ff]=thebuff[f];
                    ff++;
                };
                ofstream of;
                if ((i<58) || (i>58 && i<92) || (i>92 && i<127)) {outp[sizeof(outp)+1] += (char)i; i++;}
                else if (i>=126) {i=48; outp[sizeof(outp)+1] += (char)i + (char)i;}
                else {i+=5;};
                of.open(outp,ios::binary);
                of.write(buffer,bufsize);
                delete [] buffer;
                j=endd;
            };
        };
        j++;
    };
    delete [] thebuff;
}

1 个答案:

答案 0 :(得分:1)

这是我在C#中重写此程序后所做的代码。它工作正常。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;

namespace ConsoleApplication2
{
    class Program
    {
        static void Main(string[] args)
        {
                Console.Write("Please, enter the file name: ");
                string line;
                line = Console.ReadLine();
                Console.Write("Please, enter where to put output with filename without . and file format!: ");
                string outp;
                outp = Console.ReadLine();
                FileStream mf = new FileStream(line,FileMode.Open,FileAccess.Read);
                StreamReader rdmf = new StreamReader(mf);
                char[] header = new char[8];
                char[] trailer = new char[4];
                Console.WriteLine("Programm started successfully!..");
                long start_pos = -8, end_pos = 0, file_count=1;
                while (!rdmf.EndOfStream)
                {
                    start_pos += 8;
                    end_pos = start_pos;
                    rdmf.Read(header,0,8);
                    mf.Seek(-7, SeekOrigin.Current);
                    if ((header[0] == (char)0x50) && (header[1] == (char)0x4B) && (header[2] == (char)0x03) && (header[3] == (char)0x04) && (header[4] == (char)0x14) && (header[5] == (char)0x00) && (header[6] == (char)0x06) && (header[7] == (char)0x00))
                    {
                        Console.WriteLine("Acquiring file...");
                        bool tail_found = false;
                        while (!rdmf.EndOfStream && !tail_found)
                        {
                            rdmf.Read(trailer,0,4);
                            mf.Seek(-3, SeekOrigin.Current);
                            end_pos++;
                            if ((trailer[0]==(char)0x50) && (trailer[1]==(char)0x4B) && (trailer[2]==(char)0x05) && (trailer[3]==(char)0x06))
                            {
                                tail_found = true;
                                end_pos += 18;
                            }
                        }
                        if (tail_found)
                        {
                            mf.Seek(start_pos,SeekOrigin.Begin);
                            long seek_inter = end_pos-start_pos;
                            char[] temp = new char[seek_inter];
                            rdmf.Read(temp,0,(int)seek_inter);
                            string[] temp2 = new string[2]; 
                            temp2[0] = temp.ToString();
                            File.WriteAllLines("file"+file_count+".docx",temp2);
                            Console.WriteLine("File "+" 'file"+file_count+".docx' created...");
                            mf.Seek(end_pos,SeekOrigin.Begin);
                            start_pos=end_pos-8;
                        }
                    }
                }
            Console.WriteLine("Terminating...");
            Console.WriteLine("Press any key to exit.");
            Console.ReadKey();
            rdmf.Close();
            mf.Dispose();
        }
    }
}