Question

这是我的问题：为什么计划对比的以下过程（经典版本和自定义函数）会导致估计和 95% CI 的结果不同？请注意，我从这个 website 复制了自定义函数。

inds <- x == 0 & y >= 3 | h >= 3
as.integer(inds & !is.na(inds))
#[1] 0 0 0 0 0 0 0 0 0 0

现在，对于经典版本和自定义版本，p 和 t 的结果是相同的，正如预期的那样（至少当 equal_var = Assumed 时）。但为什么估计值和 95% 置信区间不同？

#classical version
data(mtcars)
#set Helmert contrasts
cyl2<-c(-1,1,0)
cyl1<-c(-1,-1,2)
mtcars$cyl<-factor(mtcars$cyl)
contrasts(mtcars$cyl) <-cbind(c1,c2)
classical<-summary.lm(aov(disp~cyl, mtcars))

#custom function (I want to use it because it includes results for equal AND unequal variances --> if the custom function is correct, results for equal variances should be the same as in the classical example):
oneway <- function(dv, group, contrast, alpha = .05) {
  # -- arguments --
  # dv: vector of measurements (i.e., dependent variable)
  # group: vector that identifies which group the dv measurement came from
  # contrast: list of named contrasts
  # alpha: alpha level for 1 - alpha confidence level
  # -- output --
  # computes confidence interval and test statistic for a linear contrast of population means in a between-subjects design
  # returns a data.frame object
  # estimate (est), standard error (se), t-statistic (z), degrees of freedom (df), two-tailed p-value (p), and lower (lwr) and upper (upr) confidence limits at requested 1 - alpha confidence level
  # first line reports test statistics that assume variances are equal
  # second line reports test statistics that do not assume variances are equal
  
  # means, standard deviations, and sample sizes
  ms <- by(dv, group, mean, na.rm = TRUE)
  vars <- by(dv, group, var, na.rm = TRUE)
  ns <- by(dv, group, function(x) sum(!is.na(x)))
  
  # convert list of contrasts to a matrix of named contrasts by row
  contrast <- matrix(unlist(contrast), nrow = length(contrast), byrow = TRUE, dimnames = list(names(contrast), NULL))
  
  # contrast estimate
  est <- contrast %*% ms
  
  # welch test statistic
  se_welch <- sqrt(contrast^2 %*% (vars / ns))
  t_welch <- est / se_welch
  
  # classic test statistic
  mse <- anova(lm(dv ~ factor(group)))$"Mean Sq"[2]
  se_classic <- sqrt(mse * (contrast^2 %*% (1 / ns)))
  t_classic <- est / se_classic
  
  # if dimensions of contrast are NULL, nummer of contrasts = 1, if not, nummer of contrasts = dimensions of contrast
  num_contrast <- ifelse(is.null(dim(contrast)), 1, dim(contrast)[1])
  df_welch <- rep(0, num_contrast)
  df_classic <- rep(0, num_contrast)
  
  # makes rows of contrasts if contrast dimensions aren't NULL
  if(is.null(dim(contrast))) contrast <- t(as.matrix(contrast))
  
  # calculating degrees of freedom for welch and classic
  for(i in 1:num_contrast) {
    df_classic[i] <- sum(ns) - length(ns)
    df_welch[i] <- sum(contrast[i, ]^2 * vars / ns)^2 / sum((contrast[i, ]^2 * vars / ns)^2 / (ns - 1))
  }
  
  # p-values
  p_welch <- 2 * (1 - pt(abs(t_welch), df_welch))
  p_classic <- 2 * (1 - pt(abs(t_classic), df_classic))
  
  # 95% confidence intervals
  lwr_welch <- est - se_welch * qt(p = 1 - (alpha / 2), df = df_welch)
  upr_welch <- est + se_welch * qt(p = 1 - (alpha / 2), df = df_welch)
  lwr_classic <- est - se_classic * qt(p = 1 - (alpha / 2), df = df_classic)
  upr_classic <- est + se_classic * qt(p = 1 - (alpha / 2), df = df_classic)
  
  # output
  data.frame(contrast = rep(rownames(contrast), times = 2),
             equal_var = rep(c("Assumed", "Not Assumed"), each = num_contrast),
             est = rep(est, times = 2),
             se = c(se_classic, se_welch),
             t = c(t_classic, t_welch),
             df = c(df_classic, df_welch),
             p = c(p_classic, p_welch),
             lwr = c(lwr_classic, lwr_welch),
             upr = c(upr_classic, upr_welch))
}

#results for mtcars with and without Welch correction:
custom<-(with(mtcars, 
      oneway(dv = disp, group= cyl, contrast = list (cyl1=c(-1,-1,2), cyl2 =c(-1,1,0)))))

PS：这是我解决这个问题的最佳尝试。或者，我很乐意提供有关如何在 R 中获得 Welch 校正对比的估计值和 95% CI 的任何想法，而无需依赖博客中的自定义函数。

R 中计划对比的估计值和 95% CI

0 个答案: