Brain.js NaN训练错误,数据已标准化

时间:2019-04-19 16:51:12

标签: javascript node.js machine-learning artificial-intelligence brain.js

我正在尝试使用来自JSON文件的一些数据来训练神经网络,当我将trainData数组推入0到1之间的200个数字的数组时,我会预加载并进行规范化,但是一旦我开始训练网络,就会出错每次迭代都为NaN。

var brain = require('brain.js');
var fs = require('fs');
const PDFExtract = require('pdf.js-extract').PDFExtract;
const pdfExtract = new PDFExtract();

var MAX = 200;
var structured_data = JSON.parse(fs.readFileSync("./data/test-company/structured_date.json"));

function normalize(string) {
    var input = [];
    for (let i = 0; i < string.length; i++) {
        input.push(string.charCodeAt(i) / 1000);
    }
    for (let i = string.length; i < MAX; i++) {
        input.push(0);
    }
    return input;
}


function convert_ascii(ascii) {
    var string = '';
    for (let i = 0; i < ascii.length; i++) {
        string += String.fromCharCode(ascii[i] * 1000);
    }
    return string;
}

var trainData = [];
for(let i=0;i<structured_data.length;i++){
    trainData.push({
        input:{ value:normalize(structured_data[i].title.value), x:normalize(structured_data[i].title.x.toString()), y : normalize(structured_data[i].title.y.toString())},
        output : {title : 1}
    });
    trainData.push({
        input:{ value:normalize(structured_data[i].invoice_number.value), x:normalize(structured_data[i].invoice_number.x.toString()), y : normalize(structured_data[i].invoice_number.y.toString())},
        output : {invoice_number : 1}
    });
    trainData.push({
        input:{ value:normalize(structured_data[i].invoice_date.value), x:normalize(structured_data[i].invoice_date.x.toString()), y : normalize(structured_data[i].invoice_date.y.toString())},
        output : {invoice_date : 1}
    });
    trainData.push({
        input:{ value:normalize(structured_data[i].invoice_due_date.value), x:normalize(structured_data[i].invoice_due_date.x.toString()), y : normalize(structured_data[i].invoice_due_date.y.toString())},
        output : {invoice_due_date : 1}
    });
    trainData.push({
        input:{ value:normalize(structured_data[i].total_due.value), x:normalize(structured_data[i].total_due.x.toString()), y : normalize(structured_data[i].total_due.y.toString())},
        output : {total_due : 1}
    });
    trainData.push({
        input:{ value:normalize(structured_data[i].sub_total.value), x:normalize(structured_data[i].sub_total.x.toString()), y : normalize(structured_data[i].sub_total.y.toString())},
        output : {sub_total : 1}
    });
    trainData.push({
        input:{ value:normalize(structured_data[i].tax.value), x:normalize(structured_data[i].tax.x.toString()), y : normalize(structured_data[i].tax.y.toString())},
        output : {tax : 1}
    });
}

console.log(trainData);

setTimeout(function(){
    const net = new brain.NeuralNetwork();
net.train(trainData,
    {
    log: detail => console.log(detail),
    iterations: 200
});


fs.writeFileSync('./net.json', JSON.stringify(net.toJSON()));
}, 2000);

这是火车数据的结构,如果我通过控制台对其进行记录,为了简化起见,我将其削减为我拥有的许多输入输出对中的3个。

[ { input: { value: [Array], x: [Array], y: [Array] },
    output: { title: 1 } },
  { input: { value: [Array], x: [Array], y: [Array] },
    output: { title: 1 } },
  { input: { value: [Array], x: [Array], y: [Array] },
    output: { title: 1 } } ]

每个数组都具有以下结构,一个数字数组,介于0和1之间,每个数组的固定长度为100:

[ 0.073,
  0.11,
  0.118,
  0.111,
  0.105,
  0.099,
  0.101,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0 ]

0 个答案:

没有答案