简单的神经网络对未经训练的数据进行多次训练会产生很大的误差

时间:2014-04-07 07:10:33

标签: java neural-network encog neuroscience bias-neuron

我通过在基本网络上使用具有sigmoid激活功能的encog库制作了小型乘法神经网络。 我的问题是我在未经训练的数据上遇到了很大的错误。 如何增强未经训练的数据更好的结果。容易出错。

首先我尝试过: train.getError()> 0.00001到train.getError()> 0.0000001 将误差减少到更少会产生更加明显的结果。 但这并没有帮助。

增加隐藏层也没有帮助:network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));

我试图增加每层的神经元数,但也没有帮助

我如何获得更清晰的结果?

有什么偏见吗?何时使用?

我见过: http://www.heatonresearch.com/wiki/Activation_Function 但我只使用sigmoid。何时使用其他人或我需要更改激活功能?

这是我的代码:

    package org.encog.examples.neural.xor;

    import org.encog.Encog;
    import org.encog.engine.network.activation.ActivationSigmoid;
    import org.encog.ml.data.MLData;
    import org.encog.ml.data.MLDataPair;
    import org.encog.ml.data.MLDataSet;
    import org.encog.ml.data.basic.BasicMLDataSet;
    import org.encog.neural.networks.BasicNetwork;
    import org.encog.neural.networks.layers.BasicLayer;
    import org.encog.neural.networks.training.propagation.resilient.ResilientPropagation;

    import java.awt.*;
    import java.text.DecimalFormat;
    import java.text.NumberFormat;


    public class MulHelloWorld {

        /**
         * The input necessary for MUL.
         */
        public static double MUL_INPUT[][] = { { 0.0, 0.0 }, { 1.0, 0.0 },
                { 0.2, 0.4 }, { 0.3, 0.2 } , {0.12 , 0.11} , {0.7,0.2} , {0.32,0.42} , {0.9,0.3} , {0.5,0.2} , { 0.4 , 0.6 } , {0.9,0.1} };

        /**
         * The ideal data necessary for MUL.
         */
        public static double MUL_IDEAL[][] = { { 0.0 }, { 0.0 }, { 0.08 }, { 0.06 } , {0.0132} , {0.14} , {0.1344} , {0.27} , {0.1} , {0.24} , {0.09} };


        private static BasicNetwork network;
        private static NumberFormat formatter = new DecimalFormat("###.#####");


        public static final void retrain() {
            network = new BasicNetwork();
            network.addLayer(new BasicLayer(null,true,2));
            network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));
            network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));
            network.addLayer(new BasicLayer(new ActivationSigmoid(),false,128));
            network.addLayer(new BasicLayer(new ActivationSigmoid(),false,1));
            network.getStructure().finalizeStructure();
            network.reset();

            // create training data
            MLDataSet trainingSet = new BasicMLDataSet(MUL_INPUT, MUL_IDEAL);

            // train the neural network
            final ResilientPropagation train = new ResilientPropagation(network, trainingSet );

            int epoch = 1;

            do {
                train.iteration();
                System.out.println("Epoch #" + epoch + " Error:" + formatter.format(train.getError()));
                epoch++;
            } while(train.getError() > 0.00001);
            train.finishTraining();

            // test the neural network
            System.out.println("Neural Network Results:");

            for(MLDataPair pair: trainingSet ) {
                final MLData output = network.compute(pair.getInput());
                System.out.println(pair.getInput().getData(0) + "," + pair.getInput().getData(1)
                        + ", actual=" + output.getData(0) + ",ideal=" + pair.getIdeal().getData(0));
            }
        }

        /**
         * The main method.
         * @param args No arguments are used.
         */
        public static void main(final String args[]) {
            // create a neural network, without using a factory

            retrain();

            final double computedValue = compute(network, 0.01, 0.01);
            final double diff = computedValue - 0.0001;
            do {
                if (diff < 0.001 && diff > -0.001) {
                    String f = formatter.format(computedValue);
                    System.out.println("0.0001:"+f);
                    System.out.println("0.0002:"+formatter.format(compute(network, 0.02, 0.01)));//0.0002
                    System.out.println("0.001:"+formatter.format(compute(network, 0.05, 0.02)));//0.001
                    Toolkit.getDefaultToolkit().beep();
                    try { Thread.sleep(7000); } catch (Exception epx) {}
                    retrain();
                } else {
                    String f = formatter.format(computedValue);
                    System.out.println("0.0001:"+f);
                    System.out.println("0.0002:"+formatter.format(compute(network, 0.02, 0.01)));//0.0002
                    System.out.println("0.001:"+formatter.format(compute(network, 0.05, 0.02)));//0.001
                    System.exit(0);
                }
            } while (diff < 0.001 && diff > -0.001);

            Encog.getInstance().shutdown();
        }


        public static final double compute(BasicNetwork network, double x, double y) {
            final double value[] = new double[1];
            network.compute( new double[] { x , y } , value );
            return value[0];
        }
    }

这是我的最后一次尝试似乎更有效但不是很好:

    package org.encog.examples.neural.xor;

    import org.encog.Encog;
    import org.encog.engine.network.activation.ActivationSigmoid;
    import org.encog.ml.data.MLData;
    import org.encog.ml.data.MLDataPair;
    import org.encog.ml.data.MLDataSet;
    import org.encog.ml.data.basic.BasicMLDataSet;
    import org.encog.neural.networks.BasicNetwork;
    import org.encog.neural.networks.layers.BasicLayer;
    import org.encog.neural.networks.training.propagation.resilient.ResilientPropagation;

    import java.awt.*;
    import java.text.DecimalFormat;
    import java.text.NumberFormat;
    import java.util.ArrayList;


    public class MulHelloWorld {

        /**
         * The input necessary for MUL.
         */
        public static double MUL_INPUT[][] = {
                { 0.0, 0.0 }, { 1.0, 0.0 }, { 0.2, 0.4 }, { 0.3, 0.2 } ,
                {0.12 , 0.11} , {0.7,0.2} , {0.32,0.42} , {0.9,0.3} ,
                {0.5,0.2} , { 0.4 , 0.6 } , {0.9,0.1} , {0.1,0.1} ,
                {0.34,0.42} , {0.3,0.3}
        };

        /**
         * The ideal data necessary for MUL.
         */
        public static double MUL_IDEAL[][] = {
                { 0.0 }, { 0.0 }, { 0.08 }, { 0.06 } ,
                {0.0132} , {0.14} , {0.1344} , {0.27} ,
                {0.1} , {0.24} , {0.09} , {0.01} ,
                {0.1428} , {0.09} };


        private static BasicNetwork network;
        private static NumberFormat formatter = new DecimalFormat("###.##########");
        private static final double acceptableDiff = 0.01;


        public static final void retrain() {
            network = new BasicNetwork();
            network.addLayer(new BasicLayer(null,true,2));
            network.addLayer(new BasicLayer(new ActivationSigmoid(),true,32));
            network.addLayer(new BasicLayer(new ActivationSigmoid(),true,32));
            network.addLayer(new BasicLayer(new ActivationSigmoid(),true,1));
            network.getStructure().finalizeStructure();
            network.reset();

            ArrayList<Double> inputs = new ArrayList<Double>();
            ArrayList<Double> inputs2 = new ArrayList<Double>();
            ArrayList<Double> outputs = new ArrayList<Double>();
            double j = 0;
            int size = 64;
            for (int i = 0; i < size; i++) {
                final double random1 = Math.random();
                final double random2 = Math.random();
                inputs.add( random1 );
                inputs2.add( random2 );
                outputs.add( random1*random2 );
            }
            final Double x1[] = new Double[size];
            final Double x2[] = new Double[size];
            final Double x3[] = new Double[size];

            final Double[] inputz1 = inputs.toArray(x1);
            final Double[] inputz2 = inputs2.toArray(x2);
            final Double[] outz = outputs.toArray(x3);

            final double inputsAll[][] = new double[inputz1.length][2];
            final double outputsAll[][] = new double[inputz1.length][1];

            final int inputz1Size = inputz1.length;
            for (int x = 0; x < inputz1Size ; x++) {
                inputsAll[x][0] = inputz1[x];
                inputsAll[x][1] = inputz2[x];

                outputsAll[x][0] = outz[x];
            }

            // create training data
            MLDataSet trainingSet = new BasicMLDataSet(inputsAll, outputsAll );

            // train the neural network
            final ResilientPropagation train = new ResilientPropagation(network, trainingSet );

            int epoch = 1;
            do {
                train.iteration();
                System.out.println("Epoch #" + epoch + " Error:" + formatter.format(train.getError()));
                epoch++;
            } while(train.getError() > acceptableDiff);
            train.finishTraining();

            // test the neural network
            System.out.println("Neural Network Results:");

            for(MLDataPair pair: trainingSet ) {
                final MLData output = network.compute(pair.getInput());
                System.out.println(pair.getInput().getData(0) + "," + pair.getInput().getData(1)
                        + ", actual=" + output.getData(0) + ",ideal=" + pair.getIdeal().getData(0));
            }
        }

        /**
         * The main method.
         * @param args No arguments are used.
         */
        public static void main(final String args[]) {
            // create a neural network, without using a factory

            retrain();


            double random3 = Math.random();
            double random4 = Math.random();
            double v2 = random3 * random4;
            double computedValue = compute(network, random3, random4);
            System.out.println(formatter.format(v2) + ":" + formatter.format(computedValue));

            final double diff = computedValue - v2;
            do {
                if (diff <  acceptableDiff || diff > -acceptableDiff ) {
                    String f = formatter.format(computedValue);
                    {
                        double random = Math.random();
                        double random1 = Math.random();
                        double v = random * random1;
                        System.out.println(formatter.format(v) + ":" + formatter.format(compute(network, random, random1)));
                    }

                    {
                        double random = Math.random();
                        double random1 = Math.random();
                        double v = random * random1;
                        System.out.println(formatter.format(v) + ":" + formatter.format(compute(network, random, random1)));
                    }

                    {
                        double random = Math.random();
                        double random1 = Math.random();
                        double v = random * random1;
                        System.out.println(formatter.format(v) + ":" + formatter.format(compute(network, random, random1)));
                    }

                    Toolkit.getDefaultToolkit().beep();
                    try { Thread.sleep(1000); } catch (Exception epx) {}
                    retrain();
                } else {
                    String f = formatter.format(computedValue);
                    System.out.println("0.0001:"+f);
                    System.out.println("0.0002:"+formatter.format(compute(network, 0.02, 0.01)));//0.0002
                    System.out.println("0.001:"+formatter.format(compute(network, 0.05, 0.02)));//0.001
                    System.exit(0);
                }
            } while (diff < acceptableDiff || diff > -acceptableDiff);

            Encog.getInstance().shutdown();
        }


        public static final double compute(BasicNetwork network, double x, double y) {
            final double value[] = new double[1];
            network.compute( new double[] { x , y } , value );
            return value[0];
        }
    }

1 个答案:

答案 0 :(得分:0)

你可能会发现你实际上过于紧密地训练训练集,因此你的网络不能很好地概括。更好的策略是获得第三组,以进行验证。您可以使用此数据设置有效结果的训练错误,然后在未经训练的数据上测试网络。

我不熟悉这个特定的包,但您可能还想查看其他培训方法。我发现缩放共轭梯度通常比基本反向传播要好一些。