Question

为什么使用Gradient Descent for Ridge Regression Cost函数估计的参数与标准GLMNET包返回的参数不匹配。

我已经实现了一个函数，该函数使用梯度下降来估计岭线性回归的参数。代码如下所示。

##############################################################################################################
#   Function to estimate the weight of the Multiple Linear Regression problem with L2 penalty on the         #
#   coefficients using Gradient Descent technique.                                                           #
##############################################################################################################

gradient_ridge <- function(train.Y=NULL, train.X=NULL, alpha = 0.001, tolerance = 0.01,lambda=0)

{

# Checking of the Output variable is NULL or not    
  if(is.null(train.Y))
  {
   stop("Response Variable cannot be Null")
  }

# Checking if the predictor variables is NULL or not   
  if(is.null(train.X))
  {
   stop("Predictor Variable cannot be Null")
  }

# Passing the input data through a user defined function to convert categorical data into dummy variables    
  train.X=input.split(df=train.X)
  train.X = cbind(1,train.X)
  nweights = ncol(train.X)
  delta = 1
  weight = matrix(rep(0,nweights),nrow=nweights)

# A while loop which iterates till the weights becomes close to each other thresholded by a tolerance value  
  while(sum(delta^2) > tolerance)
  {

# Gradient for a Ridge Cost Linear Regression is given by the following formula          
          delta = t(train.X)%*%(train.Y - train.X%*%weight)-lambda*diag(nweights) %*% c(0,weight[-1])

# Updating the weights for each iteration           
          weight = weight + alpha*delta 

  }


# At last returning the estimated weights to the original calling function  
  return(weight)

}

当我将上述代码估计的权重与GLMNET包返回的权重进行比较时，权重不匹配。谁能告诉我，我是否可以期待结果与我估计的GLMNET包匹配。

我试图以下列方式匹配参数

data=mtcars
data$cyl=as.factor(data$cyl)

a=gradient_ridge(train.Y = data$mpg,train.X = data[,-1],lambda = 20)
a$coefficients

data=as.data.frame(model.matrix(~.,data)[,-1])
ridge=glmnet(as.matrix(data[,-1]),data$mpg,alpha = 0,lambda = 20)
coef(ridge)

输入分割功能的代码

input.split = function(df)
{
  df.categorical = as.data.frame(matrix(NA,nrow(df),0))
  df.binary = matrix(NA,nrow(df),0)
  df.numerical = df
  df.dummyVars = matrix(NA,nrow(df),0)
  colNames.categorical = vector()
  colnames.binary = vector()
  categorical.index = 1
  binary.index = 1

  for(i in 1:ncol(df))
  {
    #Extracting Factors
    if(is.factor(df[,i]))
    {
      df.categorical = cbind(df.categorical,df[,i])
      colNames.categorical[categorical.index] = colnames(df)[i]
      categorical.index = categorical.index + 1
    }

    #Extracting columns with binary values (0,1)
    if( sum(is.element(c(0,1),sort(unique(df[,i])))) == 2 && length(sort(unique(df[,i])))==2 )
    {
      df.binary = cbind(df.binary,df[,i])
      colnames.binary[binary.index] = colnames(df)[i]
      binary.index = binary.index + 1
    }
  }

  colnames(df.categorical) = colNames.categorical
  colnames(df.binary) = colnames.binary

  #Extracting columns with numerical values
  num.colums = setdiff(colnames(df),union(colNames.categorical,colnames.binary))
  df.numerical = scale(df[,num.colums])

  #Creating Dummy Variables
  df.categorical = model.matrix(~., data=df.categorical)
  df.categorical = df.categorical[,-1]

  #View(df.categorical)
  #View(df.binary)
  #View(df.numerical)

  final_data = as.matrix(cbind(df.numerical,df.categorical,df.binary))
  #View(final_data)
  return(final_data)
}

Gradient Descent Ridge回归与GLMNET包

0 个答案: