Tensorflow.js cnn示例很好,我决定使用自己的自定义字符图像进行训练(像这样的本地图像,也可以作为浏览器img元素使用)。但是,我无法复制测试,因为示例的代码使用了预处理的数据图像。
我复制了此处的示例(https://github.com/tensorflow/tfjs-examples/blob/master/mnist-node/README.md),并添加了必需的node js程序包。该示例成功运行。但是我意识到我无法更改示例使用的数据,因为它会加载如下所示的预处理数据。
const BASE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/';
const TRAIN_IMAGES_FILE = 'train-images-idx3-ubyte';
const TRAIN_LABELS_FILE = 'train-labels-idx1-ubyte';
const TEST_IMAGES_FILE = 't10k-images-idx3-ubyte';
const TEST_LABELS_FILE = 't10k-labels-idx1-ubyte';
我用MNIST(28 * 28)制作了相同格式的图像,所以我以为我可以更改训练和测试数据,但是失败了,因为我不知道idx3-ubyte
是什么格式。 data.js
个文件的URL为here。
如何生成相同的ubyte
文件?或如何直接使用本地图像或img元素?
更新
我检查了data.js
文件的阅读部分,并设法生成了相同的文件格式。它还具有标头值。
async function loadImages(filename) {
const buffer = await fetchOnceAndSaveToDiskWithBuffer(filename);
const headerBytes = IMAGE_HEADER_BYTES;
const recordBytes = IMAGE_HEIGHT * IMAGE_WIDTH;
const headerValues = loadHeaderValues(buffer, headerBytes);
assert.equal(headerValues[0], IMAGE_HEADER_MAGIC_NUM);
assert.equal(headerValues[2], IMAGE_HEIGHT);
assert.equal(headerValues[3], IMAGE_WIDTH);
const images = [];
let index = headerBytes;
while (index < buffer.byteLength) {
const array = new Float32Array(recordBytes);
for (let i = 0; i < recordBytes; i++) {
// Normalize the pixel values into the 0-1 interval, from
// the original 0-255 interval.
array[i] = buffer.readUInt8(index++) / 255;
}
images.push(array);
}
assert.equal(images.length, headerValues[1]);
return images;
}
async function loadLabels(filename) {
const buffer = await fetchOnceAndSaveToDiskWithBuffer(filename);
const headerBytes = LABEL_HEADER_BYTES;
const recordBytes = LABEL_RECORD_BYTE;
const headerValues = loadHeaderValues(buffer, headerBytes);
assert.equal(headerValues[0], LABEL_HEADER_MAGIC_NUM);
const labels = [];
let index = headerBytes;
while (index < buffer.byteLength) {
const array = new Int32Array(recordBytes);
for (let i = 0; i < recordBytes; i++) {
array[i] = buffer.readUInt8(index++);
}
labels.push(array);
}
assert.equal(labels.length, headerValues[1]);
return labels;
}
getData_(isTrainingData) {
let imagesIndex;
let labelsIndex;
if (isTrainingData) {
imagesIndex = 0;
labelsIndex = 1;
} else {
imagesIndex = 2;
labelsIndex = 3;
}
const size = this.dataset[imagesIndex].length;
tf.util.assert(
this.dataset[labelsIndex].length === size,
`Mismatch in the number of images (${size}) and ` +
`the number of labels (${this.dataset[labelsIndex].length})`);
// Only create one big array to hold batch of images.
const imagesShape = [size, IMAGE_HEIGHT, IMAGE_WIDTH, 1];
const images = new Float32Array(tf.util.sizeFromShape(imagesShape));
const labels = new Int32Array(tf.util.sizeFromShape([size, 1]));
let imageOffset = 0;
let labelOffset = 0;
for (let i = 0; i < size; ++i) {
images.set(this.dataset[imagesIndex][i], imageOffset);
labels.set(this.dataset[labelsIndex][i], labelOffset);
imageOffset += IMAGE_FLAT_SIZE;
labelOffset += 1;
}
return {
images: tf.tensor4d(images, imagesShape),
labels: tf.oneHot(tf.tensor1d(labels, 'int32'), LABEL_FLAT_SIZE).toFloat()
};
}
}
下面是生成器代码。
const {createCanvas, loadImage} = require('canvas');
const tf = require('@tensorflow/tfjs');
require('@tensorflow/tfjs-node');
const fs = require('fs');
const util = require('util');
// const writeFile = util.promisify(fs.writeFile);
// const readFile = util.promisify(fs.readFile);
(async()=>{
const canvas = createCanvas(28,28);
const ctx = canvas.getContext('2d');
const ch1 = await loadImage('./u.png');
const ch2 = await loadImage('./q.png');
const ch3 = await loadImage('./r.png');
const ch4 = await loadImage('./c.png');
const ch5 = await loadImage('./z.png');
console.log(ch1);
ctx.drawImage(ch1, 0, 0);
const ch1Data = tf.fromPixels(canvas, 1);
ctx.drawImage(ch2, 0, 0);
const ch2Data = tf.fromPixels(canvas, 1);
ctx.drawImage(ch3, 0, 0);
const ch3Data = tf.fromPixels(canvas, 1);
ctx.drawImage(ch4, 0, 0);
const ch4Data = tf.fromPixels(canvas, 1);
ctx.drawImage(ch5, 0, 0);
const ch5Data = tf.fromPixels(canvas, 1);
// console.log(await ch1Data.data());
const b1 = Buffer.from(await ch1Data.data());
const b2 = Buffer.from(await ch2Data.data());
const b3 = Buffer.from(await ch3Data.data());
const b4 = Buffer.from(await ch4Data.data());
const b5 = Buffer.from(await ch5Data.data());
const buffers = [b1,b2,b3,b4,b5];
const labels = [0,1,3,2,4,0,1,2,1,0,3,0,2,3,4,0,];
const Images = [];
const size = labels.length;
for(var i = 0; i < size;i++){
Images.push(buffers[labels[i]]);
}
const imageHeaderBytes = 16;
const imageRecordBytes = 28 * 28;
const labelHeaderBytes = 8;
const labelRecordBytes = 1;
let imageBuffer = Buffer.alloc(imageHeaderBytes + size * imageRecordBytes);
let labelBuffer = Buffer.alloc(labelHeaderBytes + size * labelRecordBytes);
const imageHeaderValues = [2051, size, 28, 28];
const labelHeaderValues = [2049, size];
for (let i = 0; i < 4; i++) {
// Header data is stored in-order (aka big-endian)
imageBuffer.writeUInt32BE(imageHeaderValues[i], i * 4);
}
for (let i = 0; i < 2; i++) {
// Header data is stored in-order (aka big-endian)
labelBuffer.writeUInt32BE(labelHeaderValues[i], i * 4);
}
let imageindex = imageHeaderBytes;
let labelindex = labelHeaderBytes;
for(let i = 0; i < size; i++){
// imageBuffer = Buffer.concat([imageBuffer, Images[i]]);
// labelBuffer= Buffer.concat([labelBuffer, Buffer.from([labels[i]])]);
// labelBuffer= Buffer.concat([labelBuffer, Buffer.from([labels[i]])]);
const image = Images[i];
let index = 0;
while(index < image.byteLength){
imageBuffer.writeUInt8(image[index], imageindex);
index++;
imageindex++;
}
labelBuffer.writeUInt8(labels[i], labelindex++);
}
fs.writeFileSync('./testGeneratedImageBuffer', imageBuffer);
fs.writeFileSync('./testGeneratedLabelBuffer', labelBuffer);
})();
答案 0 :(得分:2)
“ ubyte”代表“无符号字节”。它指的是无符号的8位整数。两个 image -ubyte *文件中的每个文件都包含一系列无符号的8位整数。每个整数都是MNIST图像中的一个像素,其值> = 0和<= 255。
这就是在像素级别上表示图像的方式。现在,让我们看一下由28行和28列组成的整个图像的级别。需要28 * 28 = 784这样的整数来表示图像。在文件中,它们的组织方式使得前28个整数对应于第一行,后28个整数对应于第二行,依此类推。
以这种方式表示数据集中的所有图像,并将它们的整数连接起来以形成 image -ubyte文件的内容。为什么会有两个这样的文件?这是因为train-images-idx3-ubyte是训练数据集,而t10k-images-idx3-ubyte是测试数据集。
其他两个文件(标签 -ubyte)是MNIST图像的标签。与 image -ubyte文件一样,它们包含uint8(即无符号的8位整数)。但是,由于MNIST数据集中只有10个图像类,因此标签文件的值不是从0-255保留,而是具有> = 0和<= 9的值。
希望这很清楚。