将分类交叉熵用作带有Optimizer.minimize的损失函数-tensorflow.js

时间:2020-05-06 15:22:45

标签: javascript machine-learning tensorflow.js

以下(大致)来自tensorflow教程的示例,用于训练模型的代码为:

// The weights and biases for the two dense layers.
const w1 = tf.variable(tf.randomNormal([784, 32]));
const b1 = tf.variable(tf.randomNormal([32]));
const w2 = tf.variable(tf.randomNormal([32, 10]));
const b2 = tf.variable(tf.randomNormal([10]));

function model(x) {
  return x.matMul(w1).add(b1).relu().matMul(w2).add(b2);
}
const xs = tf.data.generator(data);
const ys = tf.data.generator(labels);
// Zip the data and labels together, shuffle and batch 32 samples at a time.
const ds = tf.data.zip({xs, ys}).shuffle(100 /* bufferSize */).batch(32);

const optimizer = tf.train.sgd(0.1 /* learningRate */);
// Train for 5 epochs.
for (let epoch = 0; epoch < 5; epoch++) {
    await ds.forEachAsync(({xs, ys}) => {
    optimizer.minimize(() => {
      const predYs = model(xs);
      const loss = tf.losses.softmaxCrossEntropy(ys, predYs);
      loss.data().then(l => console.log('Loss', l));
      return loss;
    });
  });
  console.log('Epoch', epoch);
}

我在此模型和我的模型之间进行的两个主要更改是:1)我想使用分类交叉熵作为损失函数,以及2)我正在使用let model = tf.sequential()而不是为模型创建函数。

我的模型的构造如下:

function getModel() {
    const model = tf.sequential();
    const IMAGE_WIDTH = 28;
    const IMAGE_HEIGHT = 28;
    const IMAGE_CHANNELS = 1;  


    model.add(tf.layers.conv2d({
      inputShape: [IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS],
      kernelSize: 5,
      filters: 8,
      strides: 1,
      activation: 'relu',
      kernelInitializer: 'varianceScaling'
    }));

    // The MaxPooling layer acts as a sort of downsampling using max values
    // in a region instead of averaging.  
    model.add(tf.layers.maxPooling2d({poolSize: [2, 2], strides: [2, 2]}));

    // Repeat another conv2d + maxPooling stack. 
    // Note that we have more filters in the convolution.
    model.add(tf.layers.conv2d({
      kernelSize: 5,
      filters: 16,
      strides: 1,
      activation: 'relu',
      kernelInitializer: 'varianceScaling'
    }));
    model.add(tf.layers.maxPooling2d({poolSize: [2, 2], strides: [2, 2]}));

    // Now we flatten the output from the 2D filters into a 1D vector to prepare
    // it for input into our last layer. This is common practice when feeding
    // higher dimensional data to a final classification output layer.
    model.add(tf.layers.flatten());

    // Our last layer is a dense layer which has 10 output units, one for each
    // output class (i.e. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9).
    const NUM_OUTPUT_CLASSES = 10;
    model.add(tf.layers.dense({
      units: NUM_OUTPUT_CLASSES,
      kernelInitializer: 'varianceScaling',
      activation: 'softmax'
    }));


    // Choose an optimizer, loss function and accuracy metric,
    // then compile and return the model
    const optimizer = tf.train.adam();
    model.compile({
      optimizer: optimizer,
      loss: 'categoricalCrossentropy',
      metrics: ['accuracy'],
    });

    return model;
  }

培训代码为

let input = getNextTrainBatch(BATCH_SIZE,TRAIN_DATA_SIZE, data);
model.optimizer.minimize(() => {
        const predict = model.apply(input[0]);
        const loss = tf.metrics.categoricalCrossentropy(input[1],predict);
        return loss;
});

使用此设置,当我运行代码时,出现错误Uncaught (in promise) Error: Tensor is disposed. 如果将model.apply(input[0])放在model.optimizer.minimize之外,则会收到错误

Uncaught (in promise) Error: Cannot find a connection between any variable and the result of the loss function y=f(x). Please make sure the operations that use variables are inside the function f passed to minimize().

我认为第二个错误是由于分类交叉熵处于tf.metrics之下而不是tf.loss,但是tf.loss类中没有分类交叉熵。我不确定该怎么办/如果我可以为此使用分类交叉熵

注意:如果版本很重要,我将"@tensorflow/tfjs": "1.0.2"用于依赖项中的张量流

编辑: 我意识到,代替实际的损失函数,我需要使用tf.metrics.categoricalCrossEntropy来代替tf.losses.softmaxCrossEntropy。但是,尽管有此更改,我仍然会收到与以前相同的2个错误

0 个答案:

没有答案