我需要实现批量梯度下降,但是网络的误差不会降低。 能否请您帮我指出问题所在。我有一个类似的带有随机梯度下降的版本,效果很好。
这是净定义
#include <stdio.h>
#include <stdlib.h>
// numer of inputs
#define IN 2
// number neurons layer hidden
#define HID 8
// numer of outputs
#define OUT 1
// learning rate
#define LEARNINGRATE 0.1
typedef struct
{
double input[IN]; // input
double hidden[HID]; // layer hidden
double output[OUT]; // output
} TResults;
typedef struct
{
double weightH[HID][IN]; // weights layer hidden
double biasesH[HID]; // biases layer hidden
double weightO[OUT][HID]; // weights output
double biasesO[OUT]; // biases output
} TWeights;
此函数将所有渐变设置为0以求和。
void ClearGradients(TWeights *grad)
{
for (int i=0;i<HID;i++)
{
for (int j=0;j<IN;j++)
grad->weightH[i][j]=0;
grad->biasesH[i]=0;
}
for (int i=0;i<OUT;i++)
{
for (int j=0;j<HID;j++)
grad->weightO[i][j]=0;
grad->biasesO[i]=0;
}
}
这部分涉及网络计算
inline double Activation(double x)
{
return x>0?x:0;
}
inline double Derivative(double x)
{
return x>0?1:0;
}
double NetworkResult(double inp1,double inp2,TWeights *weights,TResults *results)
{
// load the inputs
results->input[0]=inp1;
results->input[1]=inp2;
// compute hidden layer
for (int i=0;i<HID;i++)
{
results->hidden[i]=weights->biasesH[i];
for (int j=0;j<IN;j++)
results->hidden[i] += results->input[j]*weights->weightH[i][j];
results->hidden[i]=Activation(results->hidden[i]);
}
// compute output
for (int i=0;i<OUT;i++)
{
results->output[i]=weights->biasesO[i];
for (int j=0;j<HID;j++)
results->output[i] += results->hidden[j]*weights->weightO[i][j];
results->output[i]=Activation(results->output[i]);
}
return results->output[0];
}
这部分计算所有梯度并将其求和。
void CalculateGradients(double inp1,double inp2,double result,double *error,TWeights *weights,TWeights *grad,TResults *results)
{
NetworkResult(inp1,inp2,weights,results);
double DeltaO[OUT];
double DeltaH[HID];
// layer output
double err= result-results->output[0];
*error+=err*err*0.5;
DeltaO[0]=err*Derivative(results->output[0]);
// layer hidden
for (int i=0;i<HID;i++)
{
double err=0;
for (int j=0;j<OUT;j++)
err+= DeltaO[j]*weights->weightO[j][i];
DeltaH[i]=err*Derivative(results->hidden[i]);
}
// update gradients
// layer output
for (int i=0;i<OUT;i++)
{
for (int j=0;j<HID;j++)
grad->weightO[i][j]+=DeltaO[i]*results->hidden[j];
grad->biasesO[i]+=DeltaO[i];
}
// layer hidden
for (int i=0;i<HID;i++)
{
for (int j=0;j<IN;j++)
grad->weightH[i][j]+=DeltaH[i]*results->input[j];
grad->biasesH[i]+=DeltaH[i];
}
}
通过求和的梯度更新网络的权重
void UpdateWeights(TWeights *weights,TWeights *grad,const double learningrate)
{
for (int i=0;i<OUT;i++)
{
for (int j=0;j<HID;j++)
weights->weightO[i][j]+=learningrate*grad->weightO[i][j];
weights->biasesO[i]+=learningrate*grad->biasesO[i];
}
// layer hidden
for (int i=0;i<HID;i++)
{
for (int j=0;j<IN;j++)
weights->weightH[i][j]+=learningrate*grad->weightH[i][j];
weights->biasesH[i]+=learningrate*grad->biasesH[i];
}
}
主程序训练网络并显示降低错误所需的时期数。
int main(int argc, char *argv[])
{
TWeights Weights;
TWeights Grad;
srand(1);
// initalize random weights and biases
for (int i=0;i<HID;i++)
{
for (int j=0;j<IN;j++)
Weights.weightH[i][j]= 2.0 * ( (rand()/((double)RAND_MAX)) - 0.5 ) * 0.1;
Weights.biasesH[i]=0.1;
}
for (int i=0;i<OUT;i++)
{
for (int j=0;j<HID;j++)
Weights.weightO[i][j]= 2.0 * ( (rand()/((double)RAND_MAX)) - 0.5 ) * 0.1;
Weights.biasesO[i]=0.1;
}
TResults Results;
// calculate the results of the network with the random weights
printf("0 0 = %f\n",NetworkResult(0,0,&Weights,&Results));
printf("0 1 = %f\n",NetworkResult(0,1,&Weights,&Results));
printf("1 0 = %f\n",NetworkResult(1,0,&Weights,&Results));
printf("1 1 = %f\n",NetworkResult(1,1,&Weights,&Results));
printf("\n");
// Train the net to recognize an xor operation
int epochs;
for (epochs=0;epochs<1000000;epochs++)
{
double error=0;
// set all gradients to 0
ClearGradients(&Grad);
// calculate the gradients and sum
CalculateGradients(0,0,0,&error,&Weights,&Grad,&Results); // input 0 0 result 0
CalculateGradients(0,1,1,&error,&Weights,&Grad,&Results); // input 0 1 result 1
CalculateGradients(1,0,1,&error,&Weights,&Grad,&Results); // input 1 0 result 1
CalculateGradients(1,1,0,&error,&Weights,&Grad,&Results); // input 1 1 result 0
// At the end of the 4 sample updates the weights
UpdateWeights(&Weights,&Grad,LEARNINGRATE);
if (error<0.0001) break; // exit the training with a low error
}
// calculate the network results after the train
printf("After %d epochs\n",epochs);
printf("0 0 = %f\n",NetworkResult(0,0,&Weights,&Results));
printf("0 1 = %f\n",NetworkResult(0,1,&Weights,&Results));
printf("1 0 = %f\n",NetworkResult(1,0,&Weights,&Results));
printf("1 1 = %f\n",NetworkResult(1,1,&Weights,&Results));
printf("\n");
return 0;
}