如何调用数据框中的列?

时间:2019-03-27 10:29:14

标签: r dataframe logistic-regression

我想为逻辑回归编写自己的绘图函数。 如果我手动输入变量和数据框,则有一个运行的代码。但是,当我尝试编写函数时,出现错误消息。

这是我的有效代码:

logRegEst <- function(x, y, threshold = 1e-10, maxIter = 100)
{
  calcPi <- function(x, beta)
  {
    beta <- as.vector(beta)
    return(exp(x %*% beta) / (1 + exp(x %*% beta)))
  }

  beta <- rep(0, ncol(x))   # initial guess for beta
  diff <- 1000
  # initial value bigger than threshold so that we can enter our while loop
  iterCount = 0
  # counter for the iterations to ensure we're not stuck in an infinite loop

  while(diff > threshold) # tests for convergence
  {
    pi <- as.vector(calcPi(x, beta))
    # calculate pi by using the current estimate of beta

    W <-  diag(pi * (1 - pi))
    # calculate matrix of weights W as defined in the fisher scooring algorithem

    beta_change <- solve(t(x) %*% W %*% x) %*% t(x) %*% (y - pi)
    # calculate the change in beta

    beta <- beta + beta_change   
    #  new beta
    diff <- sum(beta_change^2)
    # calculate how much we changed beta by in this iteration
    # if this is less than threshold, we'll break the while loop

    iterCount <- iterCount + 1
    # see if we've hit the maximum number of iterations
    if(iterCount > maxIter){
      stop("This isn't converging.")
    }
    # stop if we have hit the maximum number of iterations
  }
  n <- length(y)
  df <- length(y) - ncol(x)
  # calculating the degrees of freedom by taking the length of y minus
  # the number of x columns
  vcov <- solve(t(x) %*% W %*% x)
  logLik <- sum(y * log(pi / (1 - pi)) + log(1 - pi))
  deviance <- -2 * logLik
  AIC <- -2 * logLik + 2 * ncol(x)
  rank <- ncol(x)
  #h <-  diag(W^(1/2)%*%x%*%(t(x)%*%W%*%x)^(-1)%*%t(x)%*%W^(1/2))
  #stdresiduals <- est$residuals/(sqrt(1-h))
  #stdresiduals <- W%*%solve(crossprod(W,W),t(W))
  list(coefficients = beta, vcov = vcov, df = df, deviance = deviance,
       AIC = AIC, iter = iterCount - 1, x = x, y = y, n = n, rank = rank)
  # returning results
}

logReg <- function(formula, data)
{
  if (sum(is.na(data)) > 0) {
    print("missing values in data")
  } else {
    mf <- model.frame(formula = formula, data = data)
    # model.frame() returns us a data.frame with the variables needed to use the
    # formula.
    x <- model.matrix(attr(mf, "terms"), data = mf)
    # model.matrix() creates a disign matrix. That means that for example the
    #"Sex"-variable is given as a dummy variable with ones and zeros.
    y <- as.numeric(model.response(mf)) - 1
    # model.response gives us the response variable.
    est <- logRegEst(x, y)
    # Now we have the starting position to apply our function from above.
    est$formula <- formula
    est$call <- match.call()
    # We add the formular and the call to the list.
    nullModel <- logRegEst(x = as.matrix(rep(1, length(y))), y)
    est$nullDeviance <- nullModel$deviance
    est$nullDf <- nullModel$df
    mu <- exp(as.vector(est$x %*% est$coefficients)) /
      (1 + exp(as.vector(est$x %*% est$coefficients)))
    # computing the fitted values
    est$residuals <- (est$y - mu) / sqrt(mu * (1 - mu))
    est$mu <- mu
    est$x <- x
    est$y <- y
    est$data <- data
    #hat <- (t(mu))^(1/2)%*%x%*%(t(x)%*%mu%*%x)^(-1)%*%t(x)%*%mu^(1/2)
    #est$stdresiduals <- est$residuals/(sqrt(1-hat))

    VX <- diag(sqrt(mu*(1-mu)))
    hat <- H <- VX%*%solve(crossprod(VX,VX),t(VX))
    est$stdresiduals <- est$residuals/(sqrt(1-hat))
    class(est) <- "logReg"
    # defining the class
    est
  }
}

predict.logReg <- function(object, newdata=NULL, factorName = NULL, ...) {
  ranks <- length(unique(object$data[, factorName]))
  newdata[, factorName] <- factor(newdata[, factorName], levels = c(1:ranks))
  xNames <- delete.response(terms(object$formula))
  xMatrix <- model.matrix(xNames, newdata)
  (exp(xMatrix %*% object$coefficients))/(1 + (exp(xMatrix %*% object$coefficients)))
}

mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
mydata2 <- mydata[with(mydata, order(mydata$gre)),]
points <- (predict.logReg(logReg(admit~ mydata2$gre, data = mydata2)))
plot(mydata2$gre, points[,1])
lines(x = mydata2$gre, y = points[,1])

我得到的是这个情节:

enter image description here

当我尝试编写绘图函数时,出现错误:

plot.logReg <- function(object, varName, ...)
{

  keep_loop = TRUE
  while (keep_loop) {
    switch (menu(c("QQ plot for residuals of logistic regression",
                   "Logistic regression model fit", "exit"),
                 title = "Which plot?"),
            1 == {
              qqnorm(object$residuals,
                     main = "Normal QQ-Plot of Model Residuals",
                     ylab = "Residuals")
              # plot residuals against the theoretical quantils of
              # the standard normal distribution and give the plot the titel
              # "Normal QQ-Plot of Model Residuals"
              qqline(object$residuals)
              # add the line through the origin

            },
            2 == {

              mydata2 <- object$data[with(object$data, order(object$data[,"varName"])),]
              points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
              plot(mydata2[,"varName"], points[,1], xlab = "variable", ylab = "Probabilities")
              lines(x = mydata2[,"varName"], y = points[,1])
            },
            3 == {
              keep_loop = FALSE
            })
  }

}

log <- logReg(admit ~ gre + gpa + rank, mydata)
plot(log, varName = gre)

为什么plot.logReg函数不起作用?非常感谢您的帮助。谢谢! 编辑:第二次尝试:

2 == {

              mydata2 <- object$data[with(object$data, order(object$data[,varName])),]
              points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
              plot(mydata2[,varName], points[,1], xlab = varName, ylab = "Probabilities")
              lines(x = mydata2[,varName], y = points[,1])
            },
            3 == {
              keep_loop = FALSE
            })
  }

}

plot(log, varName = "gre")

第三次尝试:

 2 == {

              mydata2 <- object$data[with(object$data, order(object$data %>% pull(varName))),]
              points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
              plot(mydata2 %>% pull(varName), points[,1], xlab = "varName", ylab = "Probabilities")
              lines(x = mydata2 %>% pull(varName), y = points[,1])
            },
            3 == {
              keep_loop = FALSE
            })
  }

}

plot(log, varName = gre)

1 个答案:

答案 0 :(得分:0)

@RuiBarrades和@LAP感谢您的回答。我已经尝试过事先更改报价,但是没有用。例如,我尝试过:

plot.logReg <- function(object, varName, ...)
{

  keep_loop = TRUE
  while (keep_loop) {
    switch (menu(c("QQ plot for residuals of logistic regression",
                   "Logistic regression model fit", "exit"),
                 title = "Which plot?"),
            1 == {
              qqnorm(object$residuals,
                     main = "Normal QQ-Plot of Model Residuals",
                     ylab = "Residuals")
              # plot residuals against the theoretical quantils of
              # the standard normal distribution and give the plot the titel
              # "Normal QQ-Plot of Model Residuals"
              qqline(object$residuals)
              # add the line through the origin

            },
            2 == {

              mydata2 <- object$data[with(object$data, order(object$data[,varName])),]
              points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
              plot(mydata2[,varName], points[,1], xlab = varName, ylab = "Probabilities")
              lines(x = mydata2[,varName], y = points[,1])
            },
            3 == {
              keep_loop = FALSE
            })
  }

}

plot(log, varName = "gre")

如果这样做,我会收到错误消息:找不到对象gre。

您知道我该如何解决吗?再次感谢您的回答。