我正在尝试阅读ubyte格式。我验证了我在一台小端机上并将这些位转换为小端。但问题是我在转换后在文件中看到的数字高于255.
有什么想法吗?我从中得到了代码 How to read MNIST data in C++? 并对其进行了一些修改,但这是我的代码
if (fin.is_open())
{
int magic_number=0;
int number_of_images=0;
int n_rows=0;
int n_cols=0;
fin.read((char*)&magic_number,sizeof(magic_number));
magic_number= flipBytes(magic_number);
fin.read((char*)&number_of_images,sizeof(number_of_images));
number_of_images= flipBytes(number_of_images);
fin.read((char*)&n_rows,sizeof(n_rows));
n_rows= flipBytes(n_rows);
fin.read((char*)&n_cols,sizeof(n_cols));
n_cols= flipBytes(n_cols);
if(fin2.is_open())
{
int magic_number_label = 0;
int number_of_items = 0;
fin2.read((char*)&magic_number_label, sizeof(magic_number_label));
magic_number_label = flipBytes(magic_number_label);
fin2.read((char*)&number_of_items, sizeof(number_of_items));
number_of_items = flipBytes(number_of_items);
log << "Magic Number: " << magic_number << "\n";
log << "Number of Images: " << number_of_images << "\n";
log << "Number of Rows: " << n_rows << "\n";
log << "Number of Cols: " << n_cols << "\n";
log << "------------------------------\n";
log << "Magic Number Label: " << magic_number_label << "\n";
log << "Number of Items: " << number_of_items << "\n";
for(int i=0;i<number_of_images;++i)
{
for(int r=0;r<n_rows;++r)
{
for(int c=0;c<n_cols;++c)
{
unsigned int temp=0;
fin.read((char*)&temp,sizeof(temp));
temp = flipBytes(temp);
fout << temp << ", ";
}
}
//ATTACH ANSWER
unsigned int temp;
fin2.read((char*)&temp, sizeof(temp));
temp = flipBytes(temp);
fout << temp << '\n';
}
}
}
int UbyteExtractor::flipBytes(int i)
{
unsigned char c1, c2, c3, c4;
c1 = i & 255;
c2 = (i >> 8) & 255;
c3 = (i >> 16) & 255;
c4 = (i >> 24) & 255;
return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
}
示例输出snipet
0 0 0 0 0 0 303174147 447711358 2146959270 0 0 0 2589860894 4261281194 2900491773 1086583549 0 0 822083584 4261281262 4261281277 1381891069 2570322
答案 0 :(得分:1)
当@mrgloom's original answer每个读取1个字节时,您从文件中读取4个字节。这是一个巨大的差异。
mrgloom:
unsigned char temp=0;
file.read((char*)&temp,sizeof(temp));
您:
unsigned int temp=0;
fin.read((char*)&temp,sizeof(temp));
temp = flipBytes(temp);
fout << temp << ", ";
也许您想将像素的数值打印到控制台?然后在打印之前将字节值转换为(unsigned)int:
unsigned char temp=0;
fin.read((char*)&temp,sizeof(temp));
fout << unsigned(temp) << ", ";
顺便说一下,仅供参考,有用于字节顺序交换的标准库函数:
这是一个有效的解决方案(基本上与@ mrgloom的代码相同)
#include <iostream>
#include <fstream>
using namespace std;
#include <arpa/inet.h>
#define ERROR() do { \
cout << "I/O error at " << __func__ << ": " << __LINE__ << \
" (offset " << file.tellg() << ")" << endl; \
return; \
} while (0)
static void read_mnist(const string &full_path)
{
ifstream file (full_path.c_str(), ios::binary);
if ( ! file)
ERROR();
int magic_number=0;
int number_of_images=0;
int n_rows=0;
int n_cols=0;
if ( ! file.read((char*)&magic_number,sizeof(magic_number)))
ERROR();
magic_number= ntohl(magic_number);
if ( ! file.read((char*)&number_of_images,sizeof(number_of_images)))
ERROR();
number_of_images= ntohl(number_of_images);
if ( ! file.read((char*)&n_rows,sizeof(n_rows)))
ERROR();
n_rows= ntohl(n_rows);
if ( ! file.read((char*)&n_cols,sizeof(n_cols)))
ERROR();
n_cols= ntohl(n_cols);
cout << "Mgc==" << magic_number << "; NImg==" << number_of_images << "; "
"NRow==" << n_rows << "; NCol==" << n_cols << endl;
for(int i=0;i<number_of_images;++i)
{
cout << '[' << endl;
for(int r=0;r<n_rows;++r)
{
cout << '\t' << '[' ;
for(int c=0;c<n_cols;++c)
{
unsigned char temp=0;
if ( ! file.read((char*)&temp,sizeof(temp)))
ERROR();
cout << unsigned(temp) << ' ' ;
}
cout << ']' << endl;
}
cout << ']' << endl;
}
}
int main(int argc, char *argv[]) {
cout << "Process [" << argv[1] << "]" << endl;
read_mnist(argv[1]);
}
文稿:
$ wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
(snip)
$ gunzip t10k-images-idx3-ubyte.gz
$ ./mrgloom t10k-images-idx3-ubyte
Process [t10k-images-idx3-ubyte]
Mgc==2051; NImg==10000; NRow==28; NCol==28
[
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 84 185 159 151 60 36 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 222 254 254 254 254 241 198 198 198 198 198 198 198 198 170 52 0 0 0 0 0 0 ]
[0 0 0 0 0 0 67 114 72 114 163 227 254 225 254 254 254 250 229 254 254 140 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 17 66 14 67 67 67 59 21 236 254 106 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 83 253 209 18 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 233 255 83 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 129 254 238 44 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59 249 254 62 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 133 254 187 5 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 205 248 58 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 126 254 182 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 75 251 240 57 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 19 221 254 166 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 3 203 254 219 35 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 38 254 254 77 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 31 224 254 115 1 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 133 254 254 52 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 61 242 254 254 52 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 121 254 254 219 40 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 121 254 207 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
]
[
(snip)