我偶尔处理的数据有一个完全适合的"线性模型。对于我运行的每个回归,我需要提取我用summary(mymodel)$r.squared
进行的r.squared值,但是在完全拟合模型的情况下会失败(见下文)。
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary(mymodel)$r.squared #This raises a warning
0.5294
我该如何处理这些案件?基本上,我想我想做一些像
这样的事情If(mymodel is a perfect fit)
rsquared = 1
else
rsquared = summary(mymodel)$r.squared
答案 0 :(得分:3)
您可以使用tryCatch
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary(mymodel)$r.squared #This raises a warning
tryCatch(summary(mymodel)$r.squared, warning = function(w) return(1))
# [1] 1
并附加条件以捕捉特定警告
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary(mymodel)$r.squared #This raises a warning
f <- function(expr) {
tryCatch(expr,
warning = function(w) {
if (grepl('perfect fit', w))
return(1)
else return(w)
})
}
f(TRUE)
# [1] TRUE
f(sum(1:5))
# [1] 15
f(summary(mymodel)$r.squared)
# [1] 1
f(warning('this is not a fit warning'))
# <simpleWarning in doTryCatch(return(expr), name, parentenv, handler): this is not a fit warning>
答案 1 :(得分:1)
如果你想确保一切都运行完美,那么你可以稍微修改源代码(输入summary.lm以查看原始代码):
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
这是我修改它的方式。除了函数底部的位之外,所有函数都与原始summary
函数相同。
summary2 <- function (object, correlation = FALSE, symbolic.cor = FALSE,
...)
{
z <- object
p <- z$rank
rdf <- z$df.residual
if (p == 0) {
r <- z$residuals
n <- length(r)
w <- z$weights
if (is.null(w)) {
rss <- sum(r^2)
}
else {
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/rdf
ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
class(ans) <- "summary.lm"
ans$aliased <- is.na(coef(object))
ans$residuals <- r
ans$df <- c(0L, n, length(ans$aliased))
ans$coefficients <- matrix(NA, 0L, 4L)
dimnames(ans$coefficients) <- list(NULL, c("Estimate",
"Std. Error", "t value", "Pr(>|t|)"))
ans$sigma <- sqrt(resvar)
ans$r.squared <- ans$adj.r.squared <- 0
return(ans)
}
if (is.null(z$terms))
stop("invalid 'lm' object: no 'terms' component")
if (!inherits(object, "lm"))
warning("calling summary.lm(<fake-lm-object>) ...")
Qr <- qr(object)
n <- NROW(Qr$qr)
if (is.na(z$df.residual) || n - p != z$df.residual)
warning("residual degrees of freedom in object suggest this is not an \"lm\" fit")
r <- z$residuals
f <- z$fitted.values
w <- z$weights
if (is.null(w)) {
mss <- if (attr(z$terms, "intercept"))
sum((f - mean(f))^2)
else sum(f^2)
rss <- sum(r^2)
}
else {
mss <- if (attr(z$terms, "intercept")) {
m <- sum(w * f/sum(w))
sum(w * (f - m)^2)
}
else sum(w * f^2)
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/rdf
p1 <- 1L:p
R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
se <- sqrt(diag(R) * resvar)
est <- z$coefficients[Qr$pivot[p1]]
tval <- est/se
ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
ans$residuals <- r
ans$coefficients <- cbind(est, se, tval, 2 * pt(abs(tval),
rdf, lower.tail = FALSE))
dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]],
c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
ans$aliased <- is.na(coef(object))
ans$sigma <- sqrt(resvar)
ans$df <- c(p, rdf, NCOL(Qr$qr))
if (p != attr(z$terms, "intercept")) {
df.int <- if (attr(z$terms, "intercept"))
1L
else 0L
ans$r.squared <- mss/(mss + rss)
ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n -
df.int)/rdf)
ans$fstatistic <- c(value = (mss/(p - df.int))/resvar,
numdf = p - df.int, dendf = rdf)
}
else ans$r.squared <- ans$adj.r.squared <- 0
ans$cov.unscaled <- R
dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,
1)]
#below is the only change to the code
#instead of ans$r.squared <- 1 the original code had a warning
if (is.finite(resvar) && resvar < (mean(f)^2 + var(f)) *
1e-30) {
ans$r.squared <- 1 #this is practically the only change in the source code. Originally it had the warning here
}
#moved the above lower in the order of the code so as not to affect the original code
#checked it and seems to be working properly
if (correlation) {
ans$correlation <- (R * resvar)/outer(se, se)
dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
ans$symbolic.cor <- symbolic.cor
}
if (!is.null(z$na.action))
ans$na.action <- z$na.action
class(ans) <- "summary.lm"
ans
}
运行新公式并查看它现在可以正常运行而不会发出任何警告。无需其他if
或else if
条件。
> summary2(mymodel)$r.squared
[1] 1
答案 2 :(得分:0)
捕捉完美拟合的一个选择是确定残差:如果它是完美拟合,则残差之和将为零。
x = 1:5
# generate 3 sets of y values, last set is random values
y = matrix(data = c(rep(1,5),1:5,rnorm(5)), nrow = 5)
tolerance = 0.0001
r.sq = array(NA,ncol(y))
# check fit for three sets
for (i in 1:ncol(y)){
fit = lm(y[,i]~x)
# determine sum of residuals
if (sum(abs(resid(fit))) < tolerance) {
# perfect fit case
r.sq[i] = 1 } else {
# non-perfect fit case
r.sq[i] = summary(fit)$r.squared
}
}
print(r.sq)
# [1] 1.0000000 1.0000000 0.7638879