tensorflow.js损失无穷大

时间:2018-05-01 07:17:07

标签: tensorflow tensorflow.js

我正在尝试使用tensorflow.js模型创建一个简单的项目来查找方程的系数。然而,当跑步时,损失接近无穷大并且在4次左右的迭代中变成NaN。我不知道为什么会这样。这是我的代码:

let xs = [];
let ys = [];

let aReal = Math.random();
let bReal = Math.random();
let cReal = Math.random();
let dReal = Math.random();

for (let i = -100; i < 100; i+=1) {
    xs.push(i);
    ys.push((aReal*Math.pow(i, 3) + bReal*Math.pow(i, 2) + cReal*i + dReal) + Math.random()*10-1);
}

const a = tf.variable(tf.scalar(Math.random()));
const b = tf.variable(tf.scalar(Math.random()));
const c = tf.variable(tf.scalar(Math.random()));
const d = tf.variable(tf.scalar(Math.random()));



function predict(x) {
  return tf.tidy(() => {
    return a.mul(x.pow(tf.scalar(3, 'int32')))
      .add(b.mul(x.square()))
      .add(c.mul(x))
      .add(d);
  });
}

function loss(predictions, labels) {
  const meanSquareError = predictions.sub(labels).square().mean();
  print(meanSquareError.dataSync());
  return meanSquareError;
}

function train(xS, yS, numIterations) {
  const learningRate = 0.1;
  const optimizer = tf.train.sgd(learningRate);

  console.log(xS.dataSync(), yS.dataSync());

  for (let iter = 0; iter < numIterations; iter++) {
    optimizer.minimize(() => {
      const predYs = predict(xS);
      return loss(predYs, yS);
    });

  }
}

train(tf.tensor(xs), tf.tensor(ys), 100);

let yPred = predict(tf.tensor(xs)).dataSync();

console.log(yPred);

let trace1 = {
    x: xs,
    y: ys,
    mode: 'markers',
    type: 'scatter'
};

let trace2 = {
  x: xs,
  y: yPred,
  mode: 'lines',
};

console.log(aReal, bReal, cReal, dReal);
console.log(a.dataSync(), b.dataSync(), c.dataSync(), d.dataSync());

let graphData = [trace1, trace2];

Plotly.newPlot('graph', graphData);

Plotly只是一个用于绘制数据的js库。

2 个答案:

答案 0 :(得分:1)

尝试降低学习率。一旦它稳定,你可以调整它以加速训练。如果它太高你会得到不稳定和NaNs

const learningRate = 0.0001;

答案 1 :(得分:0)

您应该尝试规范化输入数据,以使预测正常工作。否则,优化会在数值上变得不稳定。

ys = [...];
// compute mean and stdev for ys!
normalized = (ys-ysmean)/(ysstd);
train(xs, normalized);
normed_pred = predict(xs);
pred = ysstd*normed_pred+ysmean;

在我运行的测试中,您的代码在线性模型y=ax+b上工作得很完美;因此我的结论。