2014年1月6日更新:我已经更新了问题,以便我尝试解决非线性方程。正如你们许多人所指出的那样,我不需要额外的复杂性(隐藏层,sigmoid函数等)来解决非线性问题。
另外,我意识到除了神经网络之外,我甚至可以使用其他方法来解决这样的非线性问题。我不是要编写最有效的代码或最少量的代码。这纯粹是为了让我更好地学习神经网络。
我已经创建了自己的反向传播神经网络实现。
在训练解决简单的XOR操作时,它工作正常。
但现在我想改编它&训练它解决Y = X * X + B型公式,但我没有得到预期的结果。训练后,网络无法计算出正确的答案。神经网络是否非常适合解决这样的代数方程?我意识到我的例子是微不足道的,我只是想了解更多有关神经网络及其功能的信息。
我的隐藏层正在使用sigmoid激活功能,而我的输出层正在使用身份功能。
如果您可以分析我的代码并指出任何错误,我将不胜感激。
这是我的完整代码(C#.NET):
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace NeuralNetwork
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Training Network...");
Random r = new Random();
var network = new NeuralNetwork(1, 5, 1);
for (int i = 0; i < 100000; i++)
{
int x = i % 15;
int y = x * x + 10;
network.Train(x);
network.BackPropagate(y);
}
//Below should output 20, but instead outputs garbage
Console.WriteLine("0 * 0 + 10 = " + network.Compute(0)[0]);
//Below should output 110, but instead outputs garbage
Console.WriteLine("10 * 10 + 10 = " + network.Compute(10)[0]);
//Below should output 410, but instead outputs garbage
Console.WriteLine("20 * 20 + 10 = " + network.Compute(20)[0]);
}
}
public class NeuralNetwork
{
public double LearnRate { get; set; }
public double Momentum { get; set; }
public List<Neuron> InputLayer { get; set; }
public List<Neuron> HiddenLayer { get; set; }
public List<Neuron> OutputLayer { get; set; }
static Random random = new Random();
public NeuralNetwork(int inputSize, int hiddenSize, int outputSize)
{
LearnRate = .9;
Momentum = .04;
InputLayer = new List<Neuron>();
HiddenLayer = new List<Neuron>();
OutputLayer = new List<Neuron>();
for (int i = 0; i < inputSize; i++)
InputLayer.Add(new Neuron());
for (int i = 0; i < hiddenSize; i++)
HiddenLayer.Add(new Neuron(InputLayer));
for (int i = 0; i < outputSize; i++)
OutputLayer.Add(new Neuron(HiddenLayer));
}
public void Train(params double[] inputs)
{
int i = 0;
InputLayer.ForEach(a => a.Value = inputs[i++]);
HiddenLayer.ForEach(a => a.CalculateValue());
OutputLayer.ForEach(a => a.CalculateValue());
}
public double[] Compute(params double[] inputs)
{
Train(inputs);
return OutputLayer.Select(a => a.Value).ToArray();
}
public double CalculateError(params double[] targets)
{
int i = 0;
return OutputLayer.Sum(a => Math.Abs(a.CalculateError(targets[i++])));
}
public void BackPropagate(params double[] targets)
{
int i = 0;
OutputLayer.ForEach(a => a.CalculateGradient(targets[i++]));
HiddenLayer.ForEach(a => a.CalculateGradient());
HiddenLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
OutputLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
}
public static double NextRandom()
{
return 2 * random.NextDouble() - 1;
}
public static double SigmoidFunction(double x)
{
if (x < -45.0) return 0.0;
else if (x > 45.0) return 1.0;
return 1.0 / (1.0 + Math.Exp(-x));
}
public static double SigmoidDerivative(double f)
{
return f * (1 - f);
}
public static double HyperTanFunction(double x)
{
if (x < -10.0) return -1.0;
else if (x > 10.0) return 1.0;
else return Math.Tanh(x);
}
public static double HyperTanDerivative(double f)
{
return (1 - f) * (1 + f);
}
public static double IdentityFunction(double x)
{
return x;
}
public static double IdentityDerivative()
{
return 1;
}
}
public class Neuron
{
public bool IsInput { get { return InputSynapses.Count == 0; } }
public bool IsHidden { get { return InputSynapses.Count != 0 && OutputSynapses.Count != 0; } }
public bool IsOutput { get { return OutputSynapses.Count == 0; } }
public List<Synapse> InputSynapses { get; set; }
public List<Synapse> OutputSynapses { get; set; }
public double Bias { get; set; }
public double BiasDelta { get; set; }
public double Gradient { get; set; }
public double Value { get; set; }
public Neuron()
{
InputSynapses = new List<Synapse>();
OutputSynapses = new List<Synapse>();
Bias = NeuralNetwork.NextRandom();
}
public Neuron(List<Neuron> inputNeurons) : this()
{
foreach (var inputNeuron in inputNeurons)
{
var synapse = new Synapse(inputNeuron, this);
inputNeuron.OutputSynapses.Add(synapse);
InputSynapses.Add(synapse);
}
}
public virtual double CalculateValue()
{
var d = InputSynapses.Sum(a => a.Weight * a.InputNeuron.Value) + Bias;
return Value = IsHidden ? NeuralNetwork.SigmoidFunction(d) : NeuralNetwork.IdentityFunction(d);
}
public virtual double CalculateDerivative()
{
var d = Value;
return IsHidden ? NeuralNetwork.SigmoidDerivative(d) : NeuralNetwork.IdentityDerivative();
}
public double CalculateError(double target)
{
return target - Value;
}
public double CalculateGradient(double target)
{
return Gradient = CalculateError(target) * CalculateDerivative();
}
public double CalculateGradient()
{
return Gradient = OutputSynapses.Sum(a => a.OutputNeuron.Gradient * a.Weight) * CalculateDerivative();
}
public void UpdateWeights(double learnRate, double momentum)
{
var prevDelta = BiasDelta;
BiasDelta = learnRate * Gradient; // * 1
Bias += BiasDelta + momentum * prevDelta;
foreach (var s in InputSynapses)
{
prevDelta = s.WeightDelta;
s.WeightDelta = learnRate * Gradient * s.InputNeuron.Value;
s.Weight += s.WeightDelta + momentum * prevDelta;
}
}
}
public class Synapse
{
public Neuron InputNeuron { get; set; }
public Neuron OutputNeuron { get; set; }
public double Weight { get; set; }
public double WeightDelta { get; set; }
public Synapse(Neuron inputNeuron, Neuron outputNeuron)
{
InputNeuron = inputNeuron;
OutputNeuron = outputNeuron;
Weight = NeuralNetwork.NextRandom();
}
}
}
答案 0 :(得分:1)
你使用sigmoid作为输出函数,在[0-1]范围内 但你的目标值是范围的两倍[0 - MAX_INT],我认为这是你获得NAN的基本原则。 我更新你的代码,并尝试规范化[0-1]范围内的值, 我可以得到这样的ouptut,这是我所期待的
我认为我已接近事实真相,我不确定为什么这个答案会被拒绝
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace NeuralNetwork
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Training Network...");
Random r = new Random();
var network = new NeuralNetwork(1, 3, 1);
for (int k = 0; k < 60; k++)
{
for (int i = 0; i < 1000; i++)
{
double x = i / 1000.0;// r.Next();
double y = 3 * x;
network.Train(x);
network.BackPropagate(y);
}
double output = network.Compute(0.2)[0];
Console.WriteLine(output);
}
//Below should output 10, but instead outputs either a very large number or NaN
/* double output = network.Compute(3)[0];
Console.WriteLine(output);*/
}
}
public class NeuralNetwork
{
public double LearnRate { get; set; }
public double Momentum { get; set; }
public List<Neuron> InputLayer { get; set; }
public List<Neuron> HiddenLayer { get; set; }
public List<Neuron> OutputLayer { get; set; }
static Random random = new Random();
public NeuralNetwork(int inputSize, int hiddenSize, int outputSize)
{
LearnRate = .2;
Momentum = .04;
InputLayer = new List<Neuron>();
HiddenLayer = new List<Neuron>();
OutputLayer = new List<Neuron>();
for (int i = 0; i < inputSize; i++)
InputLayer.Add(new Neuron());
for (int i = 0; i < hiddenSize; i++)
HiddenLayer.Add(new Neuron(InputLayer));
for (int i = 0; i < outputSize; i++)
OutputLayer.Add(new Neuron(HiddenLayer));
}
public void Train(params double[] inputs)
{
int i = 0;
InputLayer.ForEach(a => a.Value = inputs[i++]);
HiddenLayer.ForEach(a => a.CalculateValue());
OutputLayer.ForEach(a => a.CalculateValue());
}
public double[] Compute(params double[] inputs)
{
Train(inputs);
return OutputLayer.Select(a => a.Value).ToArray();
}
public double CalculateError(params double[] targets)
{
int i = 0;
return OutputLayer.Sum(a => Math.Abs(a.CalculateError(targets[i++])));
}
public void BackPropagate(params double[] targets)
{
int i = 0;
OutputLayer.ForEach(a => a.CalculateGradient(targets[i++]));
HiddenLayer.ForEach(a => a.CalculateGradient());
HiddenLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
OutputLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
}
public static double NextRandom()
{
return 2 * random.NextDouble() - 1;
}
public static double SigmoidFunction(double x)
{
if (x < -45.0)
{
return 0.0;
}
else if (x > 45.0)
{
return 1.0;
}
return 1.0 / (1.0 + Math.Exp(-x));
}
public static double SigmoidDerivative(double f)
{
return f * (1 - f);
}
public static double HyperTanFunction(double x)
{
if (x < -10.0) return -1.0;
else if (x > 10.0) return 1.0;
else return Math.Tanh(x);
}
public static double HyperTanDerivative(double f)
{
return (1 - f) * (1 + f);
}
public static double IdentityFunction(double x)
{
return x;
}
public static double IdentityDerivative()
{
return 1;
}
}
public class Neuron
{
public bool IsInput { get { return InputSynapses.Count == 0; } }
public bool IsHidden { get { return InputSynapses.Count != 0 && OutputSynapses.Count != 0; } }
public bool IsOutput { get { return OutputSynapses.Count == 0; } }
public List<Synapse> InputSynapses { get; set; }
public List<Synapse> OutputSynapses { get; set; }
public double Bias { get; set; }
public double BiasDelta { get; set; }
public double Gradient { get; set; }
public double Value { get; set; }
public Neuron()
{
InputSynapses = new List<Synapse>();
OutputSynapses = new List<Synapse>();
Bias = NeuralNetwork.NextRandom();
}
public Neuron(List<Neuron> inputNeurons)
: this()
{
foreach (var inputNeuron in inputNeurons)
{
var synapse = new Synapse(inputNeuron, this);
inputNeuron.OutputSynapses.Add(synapse);
InputSynapses.Add(synapse);
}
}
public virtual double CalculateValue()
{
var d = InputSynapses.Sum(a => a.Weight * a.InputNeuron.Value);// + Bias;
return Value = IsHidden ? NeuralNetwork.SigmoidFunction(d) : NeuralNetwork.IdentityFunction(d);
}
public virtual double CalculateDerivative()
{
var d = Value;
return IsHidden ? NeuralNetwork.SigmoidDerivative(d) : NeuralNetwork.IdentityDerivative();
}
public double CalculateError(double target)
{
return target - Value;
}
public double CalculateGradient(double target)
{
return Gradient = CalculateError(target) * CalculateDerivative();
}
public double CalculateGradient()
{
return Gradient = OutputSynapses.Sum(a => a.OutputNeuron.Gradient * a.Weight) * CalculateDerivative();
}
public void UpdateWeights(double learnRate, double momentum)
{
var prevDelta = BiasDelta;
BiasDelta = learnRate * Gradient; // * 1
Bias += BiasDelta + momentum * prevDelta;
foreach (var s in InputSynapses)
{
prevDelta = s.WeightDelta;
s.WeightDelta = learnRate * Gradient * s.InputNeuron.Value;
s.Weight += s.WeightDelta; //;+ momentum * prevDelta;
}
}
}
public class Synapse
{
public Neuron InputNeuron { get; set; }
public Neuron OutputNeuron { get; set; }
public double Weight { get; set; }
public double WeightDelta { get; set; }
public Synapse(Neuron inputNeuron, Neuron outputNeuron)
{
InputNeuron = inputNeuron;
OutputNeuron = outputNeuron;
Weight = NeuralNetwork.NextRandom();
}
}
}
答案 1 :(得分:1)
您真的不需要使用多层网络来解决ax + b = y的问题。单层感知器可以解决问题。
事实上,对于一个简单的问题,你甚至不需要打破真实神经网络的复杂性。看看这篇博文:
http://dynamicnotions.blogspot.co.uk/2009/05/linear-regression-in-c.html
答案 2 :(得分:0)
我没有分析你的代码,它太长了。但我可以给你一个基本问题的答案:
是的,神经网络非常适合这类问题。
事实上,对于f : R -> R
形式的ax+b=y
,你应该使用一个神经元和线性激活功能。不需要三层结构,只需一个神经元即可。如果您的代码在这种情况下失败,那么您会遇到实现错误,因为它是使用梯度下降解决的简单线性回归任务。