批量梯度下降实施

时间:2020-10-13 14:26:47

标签: c neural-network

我需要实现批量梯度下降,但是网络的误差不会降低。 能否请您帮我指出问题所在。我有一个类似的带有随机梯度下降的版本,效果很好。

这是净定义

#include <stdio.h>
#include <stdlib.h>

// numer of inputs
#define IN 2
// number neurons layer hidden
#define HID 8
// numer of outputs
#define OUT 1
// learning rate
#define LEARNINGRATE 0.1

typedef struct
{
   double input[IN]; // input
   double hidden[HID]; // layer hidden
   double output[OUT]; // output
} TResults;

typedef struct
{
   double weightH[HID][IN]; // weights layer hidden
   double biasesH[HID]; // biases layer hidden
   double weightO[OUT][HID]; // weights output
   double biasesO[OUT]; // biases output
} TWeights;

此函数将所有渐变设置为0以求和。

void ClearGradients(TWeights *grad)
{
   for (int i=0;i<HID;i++)
   {
      for (int j=0;j<IN;j++)
         grad->weightH[i][j]=0;
      grad->biasesH[i]=0;
   }
   for (int i=0;i<OUT;i++)
   {
      for (int j=0;j<HID;j++)
         grad->weightO[i][j]=0;
      grad->biasesO[i]=0;
   }
}

这部分涉及网络计算

inline double Activation(double x)
{
   return x>0?x:0;
}

inline double Derivative(double x)
{
   return x>0?1:0;
}

double NetworkResult(double inp1,double inp2,TWeights *weights,TResults *results)
{
   // load the inputs
   results->input[0]=inp1;
   results->input[1]=inp2;
   // compute hidden layer
   for (int i=0;i<HID;i++)
   {
      results->hidden[i]=weights->biasesH[i];
      for (int j=0;j<IN;j++)
         results->hidden[i] += results->input[j]*weights->weightH[i][j];
      results->hidden[i]=Activation(results->hidden[i]);
   }
   // compute output
   for (int i=0;i<OUT;i++)
   {
      results->output[i]=weights->biasesO[i];
      for (int j=0;j<HID;j++)
         results->output[i] += results->hidden[j]*weights->weightO[i][j];
      results->output[i]=Activation(results->output[i]);
   }
   return results->output[0];
}

这部分计算所有梯度并将其求和。

void CalculateGradients(double inp1,double inp2,double result,double *error,TWeights *weights,TWeights *grad,TResults *results)
{
   NetworkResult(inp1,inp2,weights,results);
   double DeltaO[OUT];
   double DeltaH[HID];
   // layer output
   double err= result-results->output[0];
   *error+=err*err*0.5;
   DeltaO[0]=err*Derivative(results->output[0]);
   // layer hidden
   for (int i=0;i<HID;i++)
   {
      double err=0;
      for (int j=0;j<OUT;j++)
         err+= DeltaO[j]*weights->weightO[j][i];
      DeltaH[i]=err*Derivative(results->hidden[i]);
   }
   // update gradients
   // layer output
   for (int i=0;i<OUT;i++)
   {
      for (int j=0;j<HID;j++)
         grad->weightO[i][j]+=DeltaO[i]*results->hidden[j];
      grad->biasesO[i]+=DeltaO[i];
   }
   // layer hidden
   for (int i=0;i<HID;i++)
   {
      for (int j=0;j<IN;j++)
         grad->weightH[i][j]+=DeltaH[i]*results->input[j];
      grad->biasesH[i]+=DeltaH[i];
   }
}

通过求和的梯度更新网络的权重

void UpdateWeights(TWeights *weights,TWeights *grad,const double learningrate)
{
   for (int i=0;i<OUT;i++)
   {
      for (int j=0;j<HID;j++)
         weights->weightO[i][j]+=learningrate*grad->weightO[i][j];
      weights->biasesO[i]+=learningrate*grad->biasesO[i];
   }
   // layer hidden
   for (int i=0;i<HID;i++)
   {
      for (int j=0;j<IN;j++)
         weights->weightH[i][j]+=learningrate*grad->weightH[i][j];
      weights->biasesH[i]+=learningrate*grad->biasesH[i];
   }
}

主程序训练网络并显示降低错误所需的时期数。

int main(int argc, char *argv[])
{
   TWeights Weights;
   TWeights Grad;
   srand(1);
   // initalize random weights and biases
   for (int i=0;i<HID;i++)
   {
      for (int j=0;j<IN;j++)
         Weights.weightH[i][j]= 2.0 * ( (rand()/((double)RAND_MAX)) - 0.5 ) * 0.1;
      Weights.biasesH[i]=0.1;
   }
   for (int i=0;i<OUT;i++)
   {
      for (int j=0;j<HID;j++)
         Weights.weightO[i][j]= 2.0 * ( (rand()/((double)RAND_MAX)) - 0.5 ) * 0.1;
      Weights.biasesO[i]=0.1;
   }
   TResults Results;
   // calculate the results of the network with the random weights
   printf("0 0 = %f\n",NetworkResult(0,0,&Weights,&Results));
   printf("0 1 = %f\n",NetworkResult(0,1,&Weights,&Results));
   printf("1 0 = %f\n",NetworkResult(1,0,&Weights,&Results));
   printf("1 1 = %f\n",NetworkResult(1,1,&Weights,&Results));
   printf("\n");
   // Train the net to recognize an xor operation
   int epochs;
   for (epochs=0;epochs<1000000;epochs++)
   {
      double error=0;
      // set all gradients to 0
      ClearGradients(&Grad);
      // calculate the gradients and sum
      CalculateGradients(0,0,0,&error,&Weights,&Grad,&Results); // input 0 0 result 0
      CalculateGradients(0,1,1,&error,&Weights,&Grad,&Results); // input 0 1 result 1
      CalculateGradients(1,0,1,&error,&Weights,&Grad,&Results); // input 1 0 result 1
      CalculateGradients(1,1,0,&error,&Weights,&Grad,&Results); // input 1 1 result 0
      // At the end of the 4 sample updates the weights
      UpdateWeights(&Weights,&Grad,LEARNINGRATE);
      if (error<0.0001) break; // exit the training with a low error
   }
   // calculate the network results after the train
   printf("After %d epochs\n",epochs);
   printf("0 0 = %f\n",NetworkResult(0,0,&Weights,&Results));
   printf("0 1 = %f\n",NetworkResult(0,1,&Weights,&Results));
   printf("1 0 = %f\n",NetworkResult(1,0,&Weights,&Results));
   printf("1 1 = %f\n",NetworkResult(1,1,&Weights,&Results));
   printf("\n");
   return 0;
}

0 个答案:

没有答案