我正在尝试使用来自JSON文件的一些数据来训练神经网络,当我将trainData数组推入0到1之间的200个数字的数组时,我会预加载并进行规范化,但是一旦我开始训练网络,就会出错每次迭代都为NaN。
var brain = require('brain.js');
var fs = require('fs');
const PDFExtract = require('pdf.js-extract').PDFExtract;
const pdfExtract = new PDFExtract();
var MAX = 200;
var structured_data = JSON.parse(fs.readFileSync("./data/test-company/structured_date.json"));
function normalize(string) {
var input = [];
for (let i = 0; i < string.length; i++) {
input.push(string.charCodeAt(i) / 1000);
}
for (let i = string.length; i < MAX; i++) {
input.push(0);
}
return input;
}
function convert_ascii(ascii) {
var string = '';
for (let i = 0; i < ascii.length; i++) {
string += String.fromCharCode(ascii[i] * 1000);
}
return string;
}
var trainData = [];
for(let i=0;i<structured_data.length;i++){
trainData.push({
input:{ value:normalize(structured_data[i].title.value), x:normalize(structured_data[i].title.x.toString()), y : normalize(structured_data[i].title.y.toString())},
output : {title : 1}
});
trainData.push({
input:{ value:normalize(structured_data[i].invoice_number.value), x:normalize(structured_data[i].invoice_number.x.toString()), y : normalize(structured_data[i].invoice_number.y.toString())},
output : {invoice_number : 1}
});
trainData.push({
input:{ value:normalize(structured_data[i].invoice_date.value), x:normalize(structured_data[i].invoice_date.x.toString()), y : normalize(structured_data[i].invoice_date.y.toString())},
output : {invoice_date : 1}
});
trainData.push({
input:{ value:normalize(structured_data[i].invoice_due_date.value), x:normalize(structured_data[i].invoice_due_date.x.toString()), y : normalize(structured_data[i].invoice_due_date.y.toString())},
output : {invoice_due_date : 1}
});
trainData.push({
input:{ value:normalize(structured_data[i].total_due.value), x:normalize(structured_data[i].total_due.x.toString()), y : normalize(structured_data[i].total_due.y.toString())},
output : {total_due : 1}
});
trainData.push({
input:{ value:normalize(structured_data[i].sub_total.value), x:normalize(structured_data[i].sub_total.x.toString()), y : normalize(structured_data[i].sub_total.y.toString())},
output : {sub_total : 1}
});
trainData.push({
input:{ value:normalize(structured_data[i].tax.value), x:normalize(structured_data[i].tax.x.toString()), y : normalize(structured_data[i].tax.y.toString())},
output : {tax : 1}
});
}
console.log(trainData);
setTimeout(function(){
const net = new brain.NeuralNetwork();
net.train(trainData,
{
log: detail => console.log(detail),
iterations: 200
});
fs.writeFileSync('./net.json', JSON.stringify(net.toJSON()));
}, 2000);
这是火车数据的结构,如果我通过控制台对其进行记录,为了简化起见,我将其削减为我拥有的许多输入输出对中的3个。
[ { input: { value: [Array], x: [Array], y: [Array] },
output: { title: 1 } },
{ input: { value: [Array], x: [Array], y: [Array] },
output: { title: 1 } },
{ input: { value: [Array], x: [Array], y: [Array] },
output: { title: 1 } } ]
每个数组都具有以下结构,一个数字数组,介于0和1之间,每个数组的固定长度为100:
[ 0.073,
0.11,
0.118,
0.111,
0.105,
0.099,
0.101,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0 ]