我通过在基本网络上使用具有sigmoid激活功能的encog库制作了小型乘法神经网络。 我的问题是我在未经训练的数据上遇到了很大的错误。 如何增强未经训练的数据更好的结果。容易出错。
首先我尝试过: train.getError()> 0.00001到train.getError()> 0.0000001 将误差减少到更少会产生更加明显的结果。 但这并没有帮助。
增加隐藏层也没有帮助:network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));
我试图增加每层的神经元数,但也没有帮助
我如何获得更清晰的结果?
有什么偏见吗?何时使用?
我见过: http://www.heatonresearch.com/wiki/Activation_Function 但我只使用sigmoid。何时使用其他人或我需要更改激活功能?
这是我的代码:
package org.encog.examples.neural.xor;
import org.encog.Encog;
import org.encog.engine.network.activation.ActivationSigmoid;
import org.encog.ml.data.MLData;
import org.encog.ml.data.MLDataPair;
import org.encog.ml.data.MLDataSet;
import org.encog.ml.data.basic.BasicMLDataSet;
import org.encog.neural.networks.BasicNetwork;
import org.encog.neural.networks.layers.BasicLayer;
import org.encog.neural.networks.training.propagation.resilient.ResilientPropagation;
import java.awt.*;
import java.text.DecimalFormat;
import java.text.NumberFormat;
public class MulHelloWorld {
/**
* The input necessary for MUL.
*/
public static double MUL_INPUT[][] = { { 0.0, 0.0 }, { 1.0, 0.0 },
{ 0.2, 0.4 }, { 0.3, 0.2 } , {0.12 , 0.11} , {0.7,0.2} , {0.32,0.42} , {0.9,0.3} , {0.5,0.2} , { 0.4 , 0.6 } , {0.9,0.1} };
/**
* The ideal data necessary for MUL.
*/
public static double MUL_IDEAL[][] = { { 0.0 }, { 0.0 }, { 0.08 }, { 0.06 } , {0.0132} , {0.14} , {0.1344} , {0.27} , {0.1} , {0.24} , {0.09} };
private static BasicNetwork network;
private static NumberFormat formatter = new DecimalFormat("###.#####");
public static final void retrain() {
network = new BasicNetwork();
network.addLayer(new BasicLayer(null,true,2));
network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));
network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));
network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));
network.addLayer(new BasicLayer(new ActivationSigmoid(),false,1));
network.getStructure().finalizeStructure();
network.reset();
// create training data
MLDataSet trainingSet = new BasicMLDataSet(MUL_INPUT, MUL_IDEAL);
// train the neural network
final ResilientPropagation train = new ResilientPropagation(network, trainingSet );
int epoch = 1;
do {
train.iteration();
System.out.println("Epoch #" + epoch + " Error:" + formatter.format(train.getError()));
epoch++;
} while(train.getError() > 0.00001);
train.finishTraining();
// test the neural network
System.out.println("Neural Network Results:");
for(MLDataPair pair: trainingSet ) {
final MLData output = network.compute(pair.getInput());
System.out.println(pair.getInput().getData(0) + "," + pair.getInput().getData(1)
+ ", actual=" + output.getData(0) + ",ideal=" + pair.getIdeal().getData(0));
}
}
/**
* The main method.
* @param args No arguments are used.
*/
public static void main(final String args[]) {
// create a neural network, without using a factory
retrain();
final double computedValue = compute(network, 0.01, 0.01);
final double diff = computedValue - 0.0001;
do {
if (diff < 0.001 && diff > -0.001) {
String f = formatter.format(computedValue);
System.out.println("0.0001:"+f);
System.out.println("0.0002:"+formatter.format(compute(network, 0.02, 0.01)));//0.0002
System.out.println("0.001:"+formatter.format(compute(network, 0.05, 0.02)));//0.001
Toolkit.getDefaultToolkit().beep();
try { Thread.sleep(7000); } catch (Exception epx) {}
retrain();
} else {
String f = formatter.format(computedValue);
System.out.println("0.0001:"+f);
System.out.println("0.0002:"+formatter.format(compute(network, 0.02, 0.01)));//0.0002
System.out.println("0.001:"+formatter.format(compute(network, 0.05, 0.02)));//0.001
System.exit(0);
}
} while (diff < 0.001 && diff > -0.001);
Encog.getInstance().shutdown();
}
public static final double compute(BasicNetwork network, double x, double y) {
final double value[] = new double[1];
network.compute( new double[] { x , y } , value );
return value[0];
}
}
这是我的最后一次尝试似乎更有效但不是很好:
package org.encog.examples.neural.xor;
import org.encog.Encog;
import org.encog.engine.network.activation.ActivationSigmoid;
import org.encog.ml.data.MLData;
import org.encog.ml.data.MLDataPair;
import org.encog.ml.data.MLDataSet;
import org.encog.ml.data.basic.BasicMLDataSet;
import org.encog.neural.networks.BasicNetwork;
import org.encog.neural.networks.layers.BasicLayer;
import org.encog.neural.networks.training.propagation.resilient.ResilientPropagation;
import java.awt.*;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
public class MulHelloWorld {
/**
* The input necessary for MUL.
*/
public static double MUL_INPUT[][] = {
{ 0.0, 0.0 }, { 1.0, 0.0 }, { 0.2, 0.4 }, { 0.3, 0.2 } ,
{0.12 , 0.11} , {0.7,0.2} , {0.32,0.42} , {0.9,0.3} ,
{0.5,0.2} , { 0.4 , 0.6 } , {0.9,0.1} , {0.1,0.1} ,
{0.34,0.42} , {0.3,0.3}
};
/**
* The ideal data necessary for MUL.
*/
public static double MUL_IDEAL[][] = {
{ 0.0 }, { 0.0 }, { 0.08 }, { 0.06 } ,
{0.0132} , {0.14} , {0.1344} , {0.27} ,
{0.1} , {0.24} , {0.09} , {0.01} ,
{0.1428} , {0.09} };
private static BasicNetwork network;
private static NumberFormat formatter = new DecimalFormat("###.##########");
private static final double acceptableDiff = 0.01;
public static final void retrain() {
network = new BasicNetwork();
network.addLayer(new BasicLayer(null,true,2));
network.addLayer(new BasicLayer(new ActivationSigmoid(),true,32));
network.addLayer(new BasicLayer(new ActivationSigmoid(),true,32));
network.addLayer(new BasicLayer(new ActivationSigmoid(),true,1));
network.getStructure().finalizeStructure();
network.reset();
ArrayList<Double> inputs = new ArrayList<Double>();
ArrayList<Double> inputs2 = new ArrayList<Double>();
ArrayList<Double> outputs = new ArrayList<Double>();
double j = 0;
int size = 64;
for (int i = 0; i < size; i++) {
final double random1 = Math.random();
final double random2 = Math.random();
inputs.add( random1 );
inputs2.add( random2 );
outputs.add( random1*random2 );
}
final Double x1[] = new Double[size];
final Double x2[] = new Double[size];
final Double x3[] = new Double[size];
final Double[] inputz1 = inputs.toArray(x1);
final Double[] inputz2 = inputs2.toArray(x2);
final Double[] outz = outputs.toArray(x3);
final double inputsAll[][] = new double[inputz1.length][2];
final double outputsAll[][] = new double[inputz1.length][1];
final int inputz1Size = inputz1.length;
for (int x = 0; x < inputz1Size ; x++) {
inputsAll[x][0] = inputz1[x];
inputsAll[x][1] = inputz2[x];
outputsAll[x][0] = outz[x];
}
// create training data
MLDataSet trainingSet = new BasicMLDataSet(inputsAll, outputsAll );
// train the neural network
final ResilientPropagation train = new ResilientPropagation(network, trainingSet );
int epoch = 1;
do {
train.iteration();
System.out.println("Epoch #" + epoch + " Error:" + formatter.format(train.getError()));
epoch++;
} while(train.getError() > acceptableDiff);
train.finishTraining();
// test the neural network
System.out.println("Neural Network Results:");
for(MLDataPair pair: trainingSet ) {
final MLData output = network.compute(pair.getInput());
System.out.println(pair.getInput().getData(0) + "," + pair.getInput().getData(1)
+ ", actual=" + output.getData(0) + ",ideal=" + pair.getIdeal().getData(0));
}
}
/**
* The main method.
* @param args No arguments are used.
*/
public static void main(final String args[]) {
// create a neural network, without using a factory
retrain();
double random3 = Math.random();
double random4 = Math.random();
double v2 = random3 * random4;
double computedValue = compute(network, random3, random4);
System.out.println(formatter.format(v2) + ":" + formatter.format(computedValue));
final double diff = computedValue - v2;
do {
if (diff < acceptableDiff || diff > -acceptableDiff ) {
String f = formatter.format(computedValue);
{
double random = Math.random();
double random1 = Math.random();
double v = random * random1;
System.out.println(formatter.format(v) + ":" + formatter.format(compute(network, random, random1)));
}
{
double random = Math.random();
double random1 = Math.random();
double v = random * random1;
System.out.println(formatter.format(v) + ":" + formatter.format(compute(network, random, random1)));
}
{
double random = Math.random();
double random1 = Math.random();
double v = random * random1;
System.out.println(formatter.format(v) + ":" + formatter.format(compute(network, random, random1)));
}
Toolkit.getDefaultToolkit().beep();
try { Thread.sleep(1000); } catch (Exception epx) {}
retrain();
} else {
String f = formatter.format(computedValue);
System.out.println("0.0001:"+f);
System.out.println("0.0002:"+formatter.format(compute(network, 0.02, 0.01)));//0.0002
System.out.println("0.001:"+formatter.format(compute(network, 0.05, 0.02)));//0.001
System.exit(0);
}
} while (diff < acceptableDiff || diff > -acceptableDiff);
Encog.getInstance().shutdown();
}
public static final double compute(BasicNetwork network, double x, double y) {
final double value[] = new double[1];
network.compute( new double[] { x , y } , value );
return value[0];
}
}
答案 0 :(得分:0)
你可能会发现你实际上过于紧密地训练训练集,因此你的网络不能很好地概括。更好的策略是获得第三组,以进行验证。您可以使用此数据设置有效结果的训练错误,然后在未经训练的数据上测试网络。
我不熟悉这个特定的包,但您可能还想查看其他培训方法。我发现缩放共轭梯度通常比基本反向传播要好一些。