训练数据归一化后如何预测?

时间:2019-12-07 15:24:00

标签: tensorflow.js

我正在使用官方文档学习TensorFlow.js,并修改了代码mentioned in Codelab,以在输入kg值时输出磅。

所以我有一个run函数,该函数在加载DOM时运行。

async function run() {
    const model = createModel();
    const data = createData();
    const tensorData = convertToTensor(data);
    const { inputs, labels } = tensorData;

    // Train Model
    await trainModel(model, inputs, labels);
    console.log('Training Complete');

    // Predict
    const normalizedPredictData = normalizeData([5]);
    const { normalizedPredictDataVal, predictValMax, predictValMin } = normalizedPredictData;
    const output = model.predict(normalizedPredictDataVal);
    const finalOutput = unNormalizeData(output, predictValMax, predictValMin);
    console.log(finalOutput.print());
}

document.addEventListener('DOMContentLoaded', run);

createModel创建一个具有2层的简单顺序模型-一个隐藏层和一个输出层。

function createModel() {
    const model = tf.sequential();

    // Hidden Layer
    model.add(tf.layers.dense({ units: 1, inputShape: [1] }));

    // Output Layer
    model.add(tf.layers.dense({ units: 1 }));

    return model;
}

createData是一个可生成500个值进行训练的函数。

function createData() {
    const data = {
        inputs: Array.from({ length: 500 }, (x, i) => i),
        labels: Array.from({ length: 500 }, (x, i) => i * 2.2)
    };

    return data;
}

输入范围是0到499,标签只是输入* 2.2,因为我想预测将kg值作为输入时的磅数。

convertToTensor函数将生成的数据转换为张量后对其进行归一化。

function convertToTensor(data) {
    return tf.tidy(() => {
        tf.util.shuffle(data);

        const inputs = data.inputs;
        const labels = data.labels;

        const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

        // Normalize Data
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();
        const labelMax = inputTensor.max();
        const labelMin = inputTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin
        };
    });
}

最后使用trainModel

对数据进行训练
async function trainModel(model, inputs, labels) {
    model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
        metrics: ['mse']
    });

    const batchSize = 32;
    const epochs = 50;

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}

现在已经对数据进行了训练,现在该预测值了。当模型使用归一化值进行训练时,我只传递归一化的输入值来预测功能。

function normalizeData(value) {
    const predictValTensor = tf.tensor2d(value, [value.length, 1]);

    const predictValMax = predictValTensor.max();
    const predictValMin = predictValTensor.min();

    const normalizedPredictDataVal = predictValTensor.sub(predictValMin).div(predictValMax.sub(predictValMin));

    return {
        normalizedPredictDataVal,
        predictValMax,
        predictValMin
    };
}

上面的函数将值转换为张量,对其进行归一化,然后返回结果,然后将结果传递给预测函数以获取输出值。当输入被规范化后,输出需要被非规范化,因此创建了一个函数来使其标准化。

function unNormalizeData(value, predictMax, predictMin) {
    const unNormPredictVal = value.mul(predictMax.sub(predictMin)).add(predictMin);
    return unNormPredictVal;
}

一旦输出未标准化,我只是将其记录到控制台。但这只是输出我作为输入给出的值。在这种情况下,值为5。

enter image description here

代码,直到训练数据正常为止。我认为错误在于我试图标准化和非标准化预测值的地方。

1 个答案:

答案 0 :(得分:0)

应该使用训练样本的最大值和最小值对预测值进行归一化。

predictValMax(分别为predictValMin)应该与inputMax(分别为inputMin)不同

const predictValMax = predictValTensor.max();
const predictValMin = predictValTensor.min();

对特征的预测应因特征数据集而不变。 功能培训集

[-5, 5], inputMin = -5, inputMax = 5; normalized = [0, 0.5]

给出这两个测试功能集:

[5, 6], predictMin = 5, predictMax = 6; normalized = [0, 1]; 

[5], predictMin = 5, predictMax = 6; normalize = [1] // ( though a division by zero occurs here).

测试集中5的归一化值不同。它也与训练数据中的标准化值不同。该模型将在每次出现相同特征5时预测不同的值,因为其归一化值取决于数据集。

如果对每个预测值应用相同的规格化参数(inputMininputMax),则不会发生这种情况。

function createModel() {
    const model = tf.sequential();

    // Hidden Layer
    model.add(tf.layers.dense({ units: 1, inputShape: [1] }));

    // Output Layer
    model.add(tf.layers.dense({ units: 1 }));

    return model;
}

let inputMin, inputMax, labelMin, labelMax

function createData() {
    const data = {
        inputs: Array.from({ length: 500 }, (x, i) => i),
        labels: Array.from({ length: 500 }, (x, i) => i * 2.2)
    };

    return data;
}

function convertToTensor(data) {
    return tf.tidy(() => {
        tf.util.shuffle(data);

        const inputs = data.inputs;
        const labels = data.labels;

        const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);
      
      inputTensor.print();
labelTensor.print()
        // Normalize Data
        inputMax = inputTensor.max();
        inputMin = inputTensor.min();
        labelMax = inputTensor.max();
        labelMin = inputTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin
        };
    });
}

async function trainModel(model, inputs, labels) {
const learningRate = 0.01;
const optimizer = tf.train.sgd(learningRate);
  // tf.train.adam()
    model.compile({
        optimizer: optimizer ,
        loss: tf.losses.meanSquaredError,
        metrics: ['mse']
    });

    const batchSize = 32;
    const epochs = 200;
  inputs.print()
  labels.print()

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}

function normalizeData(value) {
    const predictValTensor = tf.tensor2d(value, [value.length, 1]);

    //const predictValMax = predictValTensor.max();
    //const predictValMin = predictValTensor.min();

    const normalizedPredictDataVal = predictValTensor.sub(inputMin).div(inputMax.sub(inputMin));

    return {
        normalizedPredictDataVal,
        inputMax,
        inputMin
    };
}

function unNormalizeData(value, predictMax, predictMin) {
    const unNormPredictVal = value.mul(inputMax.sub(inputMin)).add(inputMin);
    return unNormPredictVal;
}

async function run() {
    const model = createModel();
    const data = createData();
    const tensorData = convertToTensor(data);
    const { inputs, labels } = tensorData;

    await trainModel(model, inputs, labels);
    console.log('Training Complete');

    const normalizedPredictData = await normalizeData([1000, 6, 7]);
  console.log('normalizedinput')
  normalizedPredictData.normalizedPredictDataVal.print()
    const { normalizedPredictDataVal, predictValMax, predictValMin } = normalizedPredictData;
    const output = await model.predict(normalizedPredictDataVal);
    console.log(output.print());
    const finalOutput = await unNormalizeData(output, predictValMax, predictValMin);
     console.log(finalOutput.print());
}

document.addEventListener('DOMContentLoaded', run);
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>Document</title>
</head>
<body>
    
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@1.0.0/dist/tf.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-vis@1.0.2/dist/tfjs-vis.umd.min.js"></script>
    <script src="index.js"></script>
</body>
</html>