20 00 20 00 00 10 00 00 <data> 20 00 20 00 00 10 00 00 <data> ...
其中(20 00 20 00 00 10 00 00)是每个数据部分(图像)之间的分隔。
3C 63 9E FF 38 5F 9E FF
31 59 91 FF 20 00 20 00
00 10 00 00 55 73 A2 FF
38 5D 9C FF 3A 5E 95 FF
我想要做的基本上是将它分开。我想取20 00 20 00 00 10 00 00分隔的部分,并将每个部分放在一个txt文件中,如1.txt,2.txt ... n.txt
我尝试逐行阅读,但它会导致一些问题,因为20 00 ..部分可以在某些情况下在2行中找到,就像上面的例子一样,所以它不会发现每次出现。
while (getline(myfile,line,'\n')){
if (line == "20 00 20 00 00 10 00 00")
答案 0 :(得分:2)
我的建议是阅读二进制文件。如果它足够小,你可以一次性将它全部读入内存,否则我建议你使用操作系统map the file into memory(或者至少是它的“窗口”)。
答案 1 :(得分:1)
#include <fstream>
using std::ifstream;
using std::ofstream;
using std::string;
void incrementFilename(char* filename) {
int iFile;
sscanf(filename, "%d.dat", &iFile);
sprintf(filename, "%d.dat", ++iFile);
int main() {
char outputFilename[16] = "1.dat";
ifstream input("myfile.dat", ifstream::binary);
ofstream output(outputFilename, ofstream::binary);
while (!input.eof() || !input.is_open()) {
char readbyte;
input.read(&readbyte, 1);
if (readbyte == 0x20) {
char remaining[7];
char testcase[7] = { 0x00, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00 };
input.read(remaining, 7);
if (strncmp(remaining, testcase, 7) == 0) {
output.open(outputFilename, ofstream::binary);
} else {
output.write(&readbyte, 1);
output.write(remaining, 7);
} else {
output.write(&readbyte, 1);
return 0;
答案 2 :(得分:0)
鉴于您所追踪的实际数据序列可能跨行分割,您需要以最小的“咬合”读取数据 - 双字符数组 - 并忽略空格(空格或换行符分隔符)。
好的,我写了以下内容。希望用法描述了该做什么。我并不特别想使用溪流 - 我发现它们非常低效 - 但你开始了它......
// SubFile.cpp
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
using namespace std;
const unsigned MaxBytesPerLine = 16;
const unsigned char magic[] = { '\x20','\x00','\x20','\x00','\x00','\x10','\x00','\x00' };
class OutFile : private ofstream {
public: // Methods
using ofstream::is_open; // Let others see whether I'm open
OutFile(const string &fileName, bool bin);
bool Write(unsigned b);
private: // Variables
unsigned num; // Number bytes in line
bool bin; // Whether to output binary
}; // OutFile
OutFile::OutFile(const string &filename, bool bin) :
bin(bin) {
if (!bin) {
} // if
} // OutFile::OutFile(name, bin)
bool OutFile::Write(unsigned b) {
if (bin) {
char c = (char)b; // Endian fix!
return write(&c, 1).good();
} // if
if (num > 0) {
*this << " ";
} // if
*this << setbase(16) << setw(2) << setfill('0') << b;
if (++num == MaxBytesPerLine) {
*this << endl;
num = 0;
} // if
return good();
} // OutFile::Write(b)
OutFile::~OutFile() {
if (bin) {
} // if
if (num == 0) {
} // if
if (!good()) {
} // if
*this << endl;
} // OutFile::~OutFile
void Usage(char *argv0) {
cout << "Usage:" << endl;
cout << " " << argv0 << " <filename.txt> [bin]" << endl;
cout << " Read <filename.txt> in hex char pairs, ignoring whitespace." << endl;
cout << " Write pairs out to multiple sub-files, called \"1.txt\", \"2.txt\" etc." << endl;
cout << " New files are started when the following sequence is detected: " << endl << " ";
for (unsigned i = 0; i < sizeof(magic); ++i) {
cout << ' ' << hex << setw(2) << setfill('0') << (int)magic[i];
} // for
cout << endl;
cout << " If bin is specified: write out in binary, and files have a '.bin' extension" << endl;
} // Usage(argv0)
int main(int argc, char *argv[]) {
if (argc < 2) {
return 1;
} // if
ifstream inFile(argv[1]);
if (!inFile.is_open()) {
cerr << "Could not open '" << argv[1] << "'!" << endl;
return 2;
} // if
bool bin = (argc >= 3) &&
(argv[2][0] == 'b'); // Close enough!
unsigned fileNum = 0; // Current output file number
inFile >> setbase(16); // All inFile accesses will be like this
while (inFile.good()) { // Let's get started!
string outFileName = to_string(++fileNum) + (bin ? ".bin" : ".txt");
OutFile outFile(outFileName, bin);
if (!outFile.is_open()) {
cerr << "Could not create " << outFileName << "!" << endl;
return (int)(fileNum + 2);
} // if
unsigned b; // byte read in
unsigned pos = 0; // Position in 'magic'
while (inFile >> b) {
if (b > 0xFF) {
cerr << argv[1] << " contains illegal value: "
<< hex << uppercase << showbase << b << endl;
return -1;
} // if
if (b == magic[pos]) { // Found some magic!
if (++pos == sizeof(magic)) { // ALL the magic?
break; // Leave!
} // if
continue; // Otherwise go back for more
} // if
if (pos > 0) { // Uh oh. No more magic!
for (unsigned i = 0; i < pos; ++i) {
outFile.Write(magic[i]); // So write out what we got
} // for
pos = 0;
} // if
} // while
} // for
if (inFile.eof()) {
return 0; // Success!
} // if
string s;
getline(inFile, s);
cerr << argv[1] << " contains invalid data: " << s << endl;
return -2;
} // main(argc,argv)
“你为什么不这样做?” “你为什么那样做?” 让闸门打开!
答案 3 :(得分:0)
这是我的解决方案。这有点低效,但是一旦我完成决赛,我可能会重写它。我假设有一些数据字节由空格分隔。问题很简单 - &gt;这只是一个模式匹配问题。我可以使用一些复杂的技术来处理它,但我们的模式有一个非常小的修复大小。即使是蛮力的方法也会有线性时间。
代码是自我解释的。我逐字节地读取文件并将其添加到缓冲区(效率不高,只能在文件中保留带索引边界的数据窗口 - &gt;这可以在创建新文件时提供更高效的I / O操作)。一旦找到终止序列,我们将其弹出并保存到文件中(我假设我们不想要空文件)。
void save(const std::vector<short>& bytes, std::string filename, int sequenceLength)
if (!bytes.size()) return; // Don't want empty files
std::ofstream outputFile(filename);
int i = 0;
for (short byte : bytes)
outputFile << std::uppercase << std::hex << byte;
i = (i + 1) % sequenceLength;
if (i) outputFile << " ";
else outputFile << std::endl;
std::string getFilename(int number)
std::stringstream ss;
ss << number << ".txt";
return ss.str();
short getIntFromHex(const char* buffer)
short result;
std::stringstream ss;
ss << std::hex << buffer;
ss >> result;
return result;
bool findTerminatingSequence(const std::vector<short>& bytes, short terminatingSequence[], int sequenceLength)
int i = 0;
int startIndex = bytes.size() - sequenceLength;
for (i; i < sequenceLength; i++)
if (terminatingSequence[i] != bytes[startIndex + i])
return i == sequenceLength;
void popSequence(std::vector<short>& bytes, int sequenceLength)
for (int j = 0; j < sequenceLength; j++)
int main()
std::vector<short> bytes;
std::ifstream inputFile("input.txt");
int outputFileIndex = 1;
int sequenceLength = 8;
short terminatingSequence[] = { 0x20, 0x00, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00 };
short nextByte;
char buffer[3];
while (inputFile >> buffer)
nextByte = getIntFromHex(buffer);
if (bytes.size() < sequenceLength ||
!findTerminatingSequence(bytes, terminatingSequence, sequenceLength))
popSequence(bytes, sequenceLength);
save(bytes, getFilename(outputFileIndex++), sequenceLength);
save(bytes, getFilename(outputFileIndex), sequenceLength);
return 0;
答案 4 :(得分:0)
use warnings;
use strict;
# Slurp entire file from stdin into variable $data
my $data = <>;
# Find offsets of all occurrences of marker in file
my @matches;
my $marker='\x20\x00\x20\x00\x00\x10\x00\x00';
while ($data =~ /($marker)/gi){
# Save offset of this match - you may want to add length($marker) here to avoid including marker in output file
push @matches, $-[0];
# Extract data between pairs of markers and write to file
for(my $i=0;$i<scalar @matches -1;$i++){
my $image=substr $data, $matches[$i], $matches[$i+1] - $matches[$i];
my $filename=sprintf("file-%05d",$i);
printf("Saving match at offset %d to file %s\n",$matches[$i],$filename);
print MYFILE $image;
Saving match at offset 12 to file file-00000
Saving match at offset 44 to file file-00001
./perlscript < binaryData
我或多或少地使用这种技术从相机中恢复受损的闪存卡。您只需在整个闪存卡中搜索一些看起来像JPEG /原始文件开头的字节,然后获取以下10-12MB并将其另存为文件。
答案 5 :(得分:0)
您的问题可以通过实施一个简单的finite state machine来解决,因为您没有长期条件。您将读取以空格分隔的十六进制值,并且如果符合您的条件,则逐个检查值。如果匹配则创建一个新文件继续流,如果没有写,则已读取当前文件。这是解决方案,可以通过改变循环来优化读取部分。
#include <fstream>
#include <sstream>
using namespace std;
void writeChunk(ostream& output, int value) {
if (value == 0)
output << "00" << " ";
output << hex << value << " ";
bool readNext(fstream& input, int& value, stringstream* keep = NULL) {
if (input.eof()) {
return false;
} else {
input >> hex >> value;
if (keep != NULL)
writeChunk(*keep, value);
return true;
string getFileName(int count) {
stringstream fileName;
fileName << count << ".txt";
return fileName.str();
int main() {
int fileCount = 1;
stringstream fileName;
fstream inputFile, outputFile;
outputFile.open(getFileName(fileCount), ios::out);
int hexValue;
while (readNext(inputFile, hexValue)) {
// It won't understand eof until an unsuccessful read, so double checking
if (inputFile.eof())
if (hexValue == 0x20) {
stringstream ifFails;
ifFails << "20 ";
if (readNext(inputFile, hexValue, &ifFails) && hexValue == 0x00 &&
readNext(inputFile, hexValue, &ifFails) && hexValue == 0x20 &&
readNext(inputFile, hexValue, &ifFails) && hexValue == 0x00 &&
readNext(inputFile, hexValue, &ifFails) && hexValue == 0x00 &&
readNext(inputFile, hexValue, &ifFails) && hexValue == 0x10 &&
readNext(inputFile, hexValue, &ifFails) && hexValue == 0x00 &&
readNext(inputFile, hexValue, &ifFails) && hexValue == 0x00) {
outputFile.open(getFileName(++fileCount), ios::out);
outputFile << ifFails.str();
} else {
writeChunk(outputFile, hexValue);
return 1;
答案 6 :(得分:0)
你也可以使用tokenizer: 首先阅读&#34; myfile&#34;成一个字符串。这是必需的,因为在一个文件上你只能有前向迭代器,但正则表达式需要一个双向的:
auto const& str(dynamic_cast<ostringstream&> (ostringstream().operator<<(ifstream("myfile").rdbuf())).str());
auto const& re(regex(".?", regex_constants::extended));
auto i(0u);
for_each(sregex_token_iterator(str.cbegin(), str.cend(), re, -1),
[&i] (string const& s) {ofstream(to_string(i++) + ".txt") << s; });
55 73 A2 FF
38 5D 9C FF 3A 5E 95 FF