Question

I am trying to code gradient descent in R. The goal is to collect a data frame of each estimate so I can plot the algorithm's search through the parameter space.

I am using the built-in dataset data(cars) in R. Unfortunately something is way off in my function. The estimates just increase linearly with each iteration! But I cannot figure out where I err.

Any tips?

Code:

GradientDescent <- function(b0_start, b1_start, x, y, niter=10, alpha=0.1) {
  # initialize
  gradient_b0 = 0
  gradient_b1 = 0
  x <- as.matrix(x)
  y <- as.matrix(y)
  N = length(y)
  results <- matrix(nrow=niter, ncol=2)
  # gradient
  for(i in 1:N){
    gradient_b0 <- gradient_b0 + (-2/N) * (y[i] - (b0_start + b1_start*x[i]))
    gradient_b1 <- gradient_b1 + (-2/N) * x[i] * (y[i] - (b0_start + b1_start*x[i]))
  }
  # descent
  b0_hat <- b0_start
  b1_hat <- b1_start
  for(i in 1:niter){
    b0_hat <- b0_hat - (alpha*gradient_b0)
    b1_hat <- b1_hat - (alpha*gradient_b1)
    # collect 
    results[i,] <- c(b0_hat,b1_hat)
  }
  # return
  df <- data.frame(results)
  colnames(df) <- c("b0", "b1")
  return(df)
}

> test <- GradientDescent(0,0,cars$speed, cars$dist, niter=1000)
> head(test,2); tail(test,2)
      b0      b1
1  8.596 153.928
2 17.192 307.856
           b0       b1
999  8587.404 153774.1
1000 8596.000 153928.0

Answer 1

以下是cars数据集的解决方案：

# dependent and independent variables
y <- cars$dist
x <- cars$speed

# number of iterations
iter_n <- 100

# initial value of the parameter
theta1 <- 0

# learning rate
alpha <- 0.001
m <- nrow(cars)
yhat <- theta1*x

# a tibble to record the parameter update and cost
library(tibble)
results <- data_frame(theta1 = as.numeric(),
                 cost = NA,
                 iteration = 1)

# run the gradient descent
for (i in 1:iter_n){
                theta1 <- theta1 - alpha * ((1 / m) * (sum((yhat - y) * x)))
                yhat <- theta1*x
                cost <- (1/m)*sum((yhat-y)^2)
                results[i, 1] = theta1
                results[i, 2] <- cost
                results[i, 3] <- i
}

# print the parameter value after the defined iteration
print(theta1)
# 2.909132

检查成本是否在下降：

library(ggplot2)
 ggplot(results, aes(x = iteration, y = cost))+
                geom_line()+
                geom_point()

我写了一篇更详细的博文here。

coding gradient descent in R

1 个答案: