Question

我的队友和我正在尝试编写梯度下降的实现，我认为我们非常接近

我们（尝试）按照与this question的第一个答案相关的步骤进行操作，即：

1。计算假设h = X * theta

2. 计算损失= h - y，也许是平方成本（损失^ 2）/ 2m

3。计算渐变= X＆＃39; *损失/ m

4. 更新参数theta = theta - alpha * gradient

但正如您可以从丢失的代码中看到的那样，我们对如何计算渐变感到有些不知所措，我们是否正确设置了它？

如何执行该计算？

X＆＃39;有什么区别？和X？

  double loss, cost, hypothesis;
  int p, iteration;

  iteration = 0;
  do 
  {
    iteration++;
    cost = 0.0;
    //loop through all instances (complete one epoch)
    for (p = 0; p < number_of_files__train; p++) 
    {

      hypothesis = calculateHypothesis( weights, feature_matrix__train, p, globo_dict_size );

      loss = outputs__train[p] - hypothesis;

      for (int i = 0; i < globo_dict_size; i++) 
      {

        weights[i] += LEARNING_RATE * loss * feature_matrix__train[p][i] * calculateGradent( weights, i, number_of_files__train, loss );

      }

      //summation of squared error (error value for all instances)
      cost += (loss*loss);
    }
    cost = cost/(2 * number_of_files__train);

  } 
  while(cost != 0 && iteration<=MAX_ITER);


}

static double calculateHypothesis( double weights[], double[][] feature_matrix, int file_index, int globo_dict_size )
{
  //# m denotes the number of examples here, not the number of features

 double sum = 0.0;

 for (int i = 0; i < globo_dict_size; i++) 
 {
   sum += ( weights[i] * feature_matrix[file_index][i] );
 }
 //bias
 sum += weights[ globo_dict_size ];

   return sigmoid(sum);
}

private static double sigmoid(double x)
{
    return 1 / (1 + Math.exp(-x));
}

static double calculateGradent( double weights[], int i, int number_of_files__train, double loss )
{
  return weights[i] * loss / number_of_files__train;
}

梯度下降计算中损失与特征的关系

0 个答案: