我正在尝试为y=e^(-(x-u)^2)/(2*o^2))
和u = 50
所在的函数o = 15
创建一个神经网络。
我必须训练我的神经网络,这样我才能找到每个y的2 x。我已经创建了下拉代码,它似乎很好地学习它,但是一旦我测试了输出,我只得到0.99到1的数字,我应该得到25和75,我只是看不清楚原因。我最好的猜测是我的错误纠正错误,但找不到错误。神经网络使用反向传播。
测试代码和培训集
class Program
{
static void Main(string[] args)
{
args = new string[] {
"c:\\testTrain.csv",
"c:\\testValues.csv"
};
// Output File
string fileTrainPath = null;
string fileValuesPath = null;
if (args.Length > 0)
{
fileTrainPath = args[0];
if (File.Exists(fileTrainPath))
File.Delete(fileTrainPath);
fileValuesPath = args[1];
if (File.Exists(fileValuesPath))
File.Delete(fileValuesPath);
}
double learningRate = 0.1;
double u = 50;
double o = 15;
Random rand = new Random();
Network net = new Network(1, 8, 4, 2);
NetworkTrainer netTrainer = new NetworkTrainer(learningRate, net);
List<TrainerSet> TrainerSets = new List<TrainerSet>();
for(int i = 0; i <= 20; i++)
{
double random = rand.NextDouble();
TrainerSets.Add(new TrainerSet(){
Inputs = new double[] { random },
Outputs = getX(random, u, o)
});
}
// Train Network
string fileTrainValue = String.Empty;
for (int i = 0; i <= 10000; i++)
{
if (i == 5000)
{ }
double error = netTrainer.RunEpoch(TrainerSets);
Console.WriteLine("Epoch " + i + ": Error = " + error);
if(fileTrainPath != null)
fileTrainValue += i + "," + learningRate + "," + error + "\n";
}
if (fileTrainPath != null)
File.WriteAllText(fileTrainPath, fileTrainValue);
// Test Network
string fileValuesValue = String.Empty;
for (int i = 0; i <= 100; i++)
{
double y = rand.NextDouble();
double[] dOutput = getX(y, u, o);
double[] Output = net.Compute(new double[] { y });
if (fileValuesPath != null)
fileValuesValue += i + "," + y + "," + dOutput[0] + "," + dOutput[1] + "," + Output[0] + "," + Output[1] + "\n";
}
if (fileValuesPath != null)
File.WriteAllText(fileValuesPath, fileValuesValue);
}
public static double getResult(int x, double u, double o)
{
return Math.Exp(-Math.Pow(x-u,2)/(2*Math.Pow(o,2)));
}
public static double[] getX(double y, double u, double o)
{
return new double[] {
u + Math.Sqrt(2 * Math.Pow(o, 2) * Math.Log(1/y)),
u - Math.Sqrt(2 * Math.Pow(o, 2) * Math.Log(1/y)),
};
}
}
网络背后的代码
public class Network
{
protected int inputsCount;
protected int layersCount;
protected NetworkLayer[] layers;
protected double[] output;
public int Count
{
get
{
return layers.Count();
}
}
public NetworkLayer this[int index]
{
get { return layers[index]; }
}
public Network(int inputsCount, params int[] neuronsCount)
{
this.inputsCount = Math.Max(1, inputsCount);
this.layersCount = Math.Max(1, neuronsCount.Length);
layers = new NetworkLayer[neuronsCount.Length];
for (int i = 0; i < layersCount; i++)
layers[i] = new NetworkLayer(neuronsCount[i],
(i == 0) ? inputsCount : neuronsCount[i - 1]);
}
public virtual double[] Compute(double[] input)
{
output = input;
foreach (NetworkLayer layer in layers)
output = layer.Compute(output);
return output;
}
}
public class NetworkLayer
{
protected int inputsCount = 0;
protected int neuronsCount = 0;
protected Neuron[] neurons;
protected double[] output;
public Neuron this[int index]
{
get { return neurons[index]; }
}
public int Count
{
get { return neurons.Length; }
}
public int Inputs
{
get { return inputsCount; }
}
public double[] Output
{
get { return output; }
}
public NetworkLayer(int neuronsCount, int inputsCount)
{
this.inputsCount = Math.Max( 1, inputsCount );
this.neuronsCount = Math.Max( 1, neuronsCount );
neurons = new Neuron[this.neuronsCount];
output = new double[this.neuronsCount];
// create each neuron
for (int i = 0; i < neuronsCount; i++)
neurons[i] = new Neuron(inputsCount);
}
public virtual double[] Compute(double[] input)
{
// compute each neuron
for (int i = 0; i < neuronsCount; i++)
output[i] = neurons[i].Compute(input);
return output;
}
}
public class Neuron
{
protected static Random rand = new Random((int)DateTime.Now.Ticks);
public int Inputs;
public double[] Input;
public double[] Weights;
public double Output = 0;
public double Threshold;
public double Error;
public Neuron(int inputs)
{
this.Inputs = inputs;
Weights = new double[inputs];
for (int i = 0; i < inputs; i++)
Weights[i] = rand.NextDouble() * 0.5;
}
public double Compute(double[] inputs)
{
Input = inputs;
double e = 0.0;
for (int i = 0; i < inputs.Length; i++)
e += Weights[i] * inputs[i];
e -= Threshold;
return (Output = sigmoid(e));
}
private double sigmoid(double value)
{
return (1 / (1 + Math.Exp(-1 * value)));
//return 1 / (1 + Math.Exp(-value));
}
}
我的培训师
public class NetworkTrainer
{
private Network network;
private double learningRate = 0.1;
public NetworkTrainer(double a, Network network)
{
this.network = network;
this.learningRate = a;
}
public double Run(double[] input, double[] output)
{
network.Compute(input);
return CorrectErrors(output);
}
public double RunEpoch(List<TrainerSet> sets)
{
double error = 0.0;
for (int i = 0, n = sets.Count; i < n; i++)
error += Run(sets[i].Inputs, sets[i].Outputs);
// return summary error
return error;
}
private double CorrectErrors(double[] desiredOutput)
{
double[] errorLast = new double[desiredOutput.Length];
NetworkLayer lastLayer = network[network.Count - 1];
for (int i = 0; i < desiredOutput.Length; i++)
{
// S(p)=y(p)*[1-y(p)]*(yd(p)-y(p))
lastLayer[i].Error = lastLayer[i].Output * (1-lastLayer[i].Output)*(desiredOutput[i] - lastLayer[i].Output);
errorLast[i] = lastLayer[i].Error;
}
// Calculate errors
for (int l = network.Count - 2; l >= 0; l--)
{
for (int n = 0; n < network[l].Count; n++)
{
double newError = 0;
for (int np = 0; np < network[l + 1].Count; np++)
{
newError += network[l + 1][np].Weights[n] * network[l + 1][np].Error;
}
network[l][n].Error = newError;
}
}
// Update Weights
// w = w + (a * input * error)
for (int l = network.Count - 1; l >= 0; l--)
{
for (int n = 0; n < network[l].Count; n++)
{
for (int i = 0; i < network[l][n].Inputs; i++)
{
// deltaW = a * y(p) * s(p)
double deltaW = learningRate * network[l][n].Output * network[l][n].Error;
network[l][n].Weights[i] += deltaW;
}
}
}
double returnError = 0;
foreach (double e in errorLast)
returnError += e;
return returnError;
}
}
答案 0 :(得分:1)
对于回归问题,您的输出层应具有标识(或至少是线性)激活函数。这样您就不必缩放输出。 identity函数的导数为1,因此输出层的导数dE / da_i为y-t(lastLayer[i].Output - desiredOutput[i]
)。