这是我的问题:为什么计划对比的以下过程(经典版本和自定义函数)会导致估计和 95% CI 的结果不同?请注意,我从这个 website 复制了自定义函数。
inds <- x == 0 & y >= 3 | h >= 3
as.integer(inds & !is.na(inds))
#[1] 0 0 0 0 0 0 0 0 0 0
现在,对于经典版本和自定义版本,p 和 t 的结果是相同的,正如预期的那样(至少当 equal_var = Assumed 时)。但为什么估计值和 95% 置信区间不同?
#classical version
data(mtcars)
#set Helmert contrasts
cyl2<-c(-1,1,0)
cyl1<-c(-1,-1,2)
mtcars$cyl<-factor(mtcars$cyl)
contrasts(mtcars$cyl) <-cbind(c1,c2)
classical<-summary.lm(aov(disp~cyl, mtcars))
#custom function (I want to use it because it includes results for equal AND unequal variances --> if the custom function is correct, results for equal variances should be the same as in the classical example):
oneway <- function(dv, group, contrast, alpha = .05) {
# -- arguments --
# dv: vector of measurements (i.e., dependent variable)
# group: vector that identifies which group the dv measurement came from
# contrast: list of named contrasts
# alpha: alpha level for 1 - alpha confidence level
# -- output --
# computes confidence interval and test statistic for a linear contrast of population means in a between-subjects design
# returns a data.frame object
# estimate (est), standard error (se), t-statistic (z), degrees of freedom (df), two-tailed p-value (p), and lower (lwr) and upper (upr) confidence limits at requested 1 - alpha confidence level
# first line reports test statistics that assume variances are equal
# second line reports test statistics that do not assume variances are equal
# means, standard deviations, and sample sizes
ms <- by(dv, group, mean, na.rm = TRUE)
vars <- by(dv, group, var, na.rm = TRUE)
ns <- by(dv, group, function(x) sum(!is.na(x)))
# convert list of contrasts to a matrix of named contrasts by row
contrast <- matrix(unlist(contrast), nrow = length(contrast), byrow = TRUE, dimnames = list(names(contrast), NULL))
# contrast estimate
est <- contrast %*% ms
# welch test statistic
se_welch <- sqrt(contrast^2 %*% (vars / ns))
t_welch <- est / se_welch
# classic test statistic
mse <- anova(lm(dv ~ factor(group)))$"Mean Sq"[2]
se_classic <- sqrt(mse * (contrast^2 %*% (1 / ns)))
t_classic <- est / se_classic
# if dimensions of contrast are NULL, nummer of contrasts = 1, if not, nummer of contrasts = dimensions of contrast
num_contrast <- ifelse(is.null(dim(contrast)), 1, dim(contrast)[1])
df_welch <- rep(0, num_contrast)
df_classic <- rep(0, num_contrast)
# makes rows of contrasts if contrast dimensions aren't NULL
if(is.null(dim(contrast))) contrast <- t(as.matrix(contrast))
# calculating degrees of freedom for welch and classic
for(i in 1:num_contrast) {
df_classic[i] <- sum(ns) - length(ns)
df_welch[i] <- sum(contrast[i, ]^2 * vars / ns)^2 / sum((contrast[i, ]^2 * vars / ns)^2 / (ns - 1))
}
# p-values
p_welch <- 2 * (1 - pt(abs(t_welch), df_welch))
p_classic <- 2 * (1 - pt(abs(t_classic), df_classic))
# 95% confidence intervals
lwr_welch <- est - se_welch * qt(p = 1 - (alpha / 2), df = df_welch)
upr_welch <- est + se_welch * qt(p = 1 - (alpha / 2), df = df_welch)
lwr_classic <- est - se_classic * qt(p = 1 - (alpha / 2), df = df_classic)
upr_classic <- est + se_classic * qt(p = 1 - (alpha / 2), df = df_classic)
# output
data.frame(contrast = rep(rownames(contrast), times = 2),
equal_var = rep(c("Assumed", "Not Assumed"), each = num_contrast),
est = rep(est, times = 2),
se = c(se_classic, se_welch),
t = c(t_classic, t_welch),
df = c(df_classic, df_welch),
p = c(p_classic, p_welch),
lwr = c(lwr_classic, lwr_welch),
upr = c(upr_classic, upr_welch))
}
#results for mtcars with and without Welch correction:
custom<-(with(mtcars,
oneway(dv = disp, group= cyl, contrast = list (cyl1=c(-1,-1,2), cyl2 =c(-1,1,0)))))
PS:这是我解决这个问题的最佳尝试。或者,我很乐意提供有关如何在 R 中获得 Welch 校正对比的估计值和 95% CI 的任何想法,而无需依赖博客中的自定义函数。