我需要解析650 MB的* .chk文件大小,并通过查找十六进制中的标题和尾部来提取* .docx文件(文件在二进制流中打开)。此代码仅收到前4个字符且无法读取。请帮助!
#include <iostream>
#include <istream>
#include <ostream>
#include <fstream>
#include <string>
using namespace std;
int main()
{
cout << "Please, enter the file name: ";
char line[255];
cin >> line;
cout << "Please, enter where to put output with filename without . and file format!: ";
char outp[255];
int const req=702545920;
char * thebuff;
thebuff = new char [req];
cin >> outp;
long i=48,j(0);
ifstream mf;
mf.open(line, ios::binary);
char t[7]={};
char trailer[4]={};
int64_t start(0),endd(0);
mf.get(thebuff,req); //doesn't work as I need!_gets only first 4 chars instead of the whole file;
while (j<=req) // here it gets because of the reason above into an endless loop;
{
long temp=j;
if ((thebuff[j]==(char)0x50) && (thebuff[j+1]==(char)0x4B) && (thebuff[j+2]==(char)0x03) && (thebuff[j+3]==(char)0x04) && (thebuff[j+4]==(char)0x14) && (thebuff[j+5]==(char)0x00) && (thebuff[j+6]==(char)0x06) && (thebuff[j+7]==(char)0x00))
{
start = j;
bool tt(false);
while (temp<=req && !tt)
{
for (int k=0;i<=3;i++)
{
trailer[k] = thebuff[temp+k];
};
if ((trailer[0]==(char)0x50) && (trailer[1]==(char)0x4B) && (trailer[2]==(char)0x05) && (trailer[3]==(char)0x06))
{
endd = temp + (long)18;
tt = true;
};
};
if (tt)
{
start -= 8;
int bufsize = endd-start;
char * buffer;
buffer = new char [bufsize];
buffer = {};
int ff(1);
for (int f=start;f<=endd;f++)
{
buffer[ff]=thebuff[f];
ff++;
};
ofstream of;
if ((i<58) || (i>58 && i<92) || (i>92 && i<127)) {outp[sizeof(outp)+1] += (char)i; i++;}
else if (i>=126) {i=48; outp[sizeof(outp)+1] += (char)i + (char)i;}
else {i+=5;};
of.open(outp,ios::binary);
of.write(buffer,bufsize);
delete [] buffer;
j=endd;
};
};
j++;
};
delete [] thebuff;
}
答案 0 :(得分:1)
这是我在C#中重写此程序后所做的代码。它工作正常。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
Console.Write("Please, enter the file name: ");
string line;
line = Console.ReadLine();
Console.Write("Please, enter where to put output with filename without . and file format!: ");
string outp;
outp = Console.ReadLine();
FileStream mf = new FileStream(line,FileMode.Open,FileAccess.Read);
StreamReader rdmf = new StreamReader(mf);
char[] header = new char[8];
char[] trailer = new char[4];
Console.WriteLine("Programm started successfully!..");
long start_pos = -8, end_pos = 0, file_count=1;
while (!rdmf.EndOfStream)
{
start_pos += 8;
end_pos = start_pos;
rdmf.Read(header,0,8);
mf.Seek(-7, SeekOrigin.Current);
if ((header[0] == (char)0x50) && (header[1] == (char)0x4B) && (header[2] == (char)0x03) && (header[3] == (char)0x04) && (header[4] == (char)0x14) && (header[5] == (char)0x00) && (header[6] == (char)0x06) && (header[7] == (char)0x00))
{
Console.WriteLine("Acquiring file...");
bool tail_found = false;
while (!rdmf.EndOfStream && !tail_found)
{
rdmf.Read(trailer,0,4);
mf.Seek(-3, SeekOrigin.Current);
end_pos++;
if ((trailer[0]==(char)0x50) && (trailer[1]==(char)0x4B) && (trailer[2]==(char)0x05) && (trailer[3]==(char)0x06))
{
tail_found = true;
end_pos += 18;
}
}
if (tail_found)
{
mf.Seek(start_pos,SeekOrigin.Begin);
long seek_inter = end_pos-start_pos;
char[] temp = new char[seek_inter];
rdmf.Read(temp,0,(int)seek_inter);
string[] temp2 = new string[2];
temp2[0] = temp.ToString();
File.WriteAllLines("file"+file_count+".docx",temp2);
Console.WriteLine("File "+" 'file"+file_count+".docx' created...");
mf.Seek(end_pos,SeekOrigin.Begin);
start_pos=end_pos-8;
}
}
}
Console.WriteLine("Terminating...");
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
rdmf.Close();
mf.Dispose();
}
}
}