R:将fisher.test纳入Hmisc的摘要M会导致错误

时间:2018-06-19 03:16:03

标签: r debugging hmisc

catTestfisher <- 
  function (tab) 
  {
    st <- if (!is.matrix(tab) || nrow(tab) < 2 | ncol(tab) < 
              2) 
      list(p.value = NA, statistic = NA, parameter = NA)
    else {
      rowcounts <- tab %*% rep(1, ncol(tab))
      tab <- tab[rowcounts > 0, ]
      if (!is.matrix(tab)) 
        list(p.value = NA, statistic = NA, parameter = NA)
      else fisher.test(tab)
    }
    list(P = st$p.value, stat = "", df = "", 
         testname = "Fisher's Exact", statname = "", latexstat = "", namefun = "", 
         plotmathstat = "")
  }

我想使用library(Hmisc) summaryM函数,但使用Fisher的精确测试,所以我写了一个catTestfisher函数并设置catTest = catTestfisher在我自己的summaryM2函数中,除summaryM

外,与catTest = catTestfisher完全相同
summaryM2 <- 
  function (formula, groups = NULL, data = NULL, subset, na.action = na.retain, 
            overall = FALSE, continuous = 10, na.include = FALSE, quant = c(0.025, 
                                                                            0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 
                                                                            0.975), nmin = 100, test = FALSE, conTest = conTestkw, 
            catTest = catTestfisher, ordTest = ordTestpo) 
  {
    marg <- length(data) && ".marginal." %in% names(data)
    if (marg) 
      formula <- update(formula, . ~ . + .marginal.)
    formula <- Formula(formula)
    Y <- if (!missing(subset) && length(subset)) 
      model.frame(formula, data = data, subset = subset, na.action = na.action)
    else model.frame(formula, data = data, na.action = na.action)
    X <- model.part(formula, data = Y, rhs = 1)
    Y <- model.part(formula, data = Y, lhs = 1)
    getlab <- function(x, default) {
      lab <- attr(x, "label")
      if (!length(lab) || lab == "") 
        default
      else lab
    }
    if (marg) {
      xm <- X$.marginal.
      X$.marginal. <- NULL
    }
    else xm <- rep("", nrow(X))
    if (length(X)) {
      xname <- names(X)
      if (length(xname) == 1 && !length(groups)) 
        groups <- xname
      if (!length(groups) && length(xname) > 1) {
        warnings("Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.")
        groups <- xname[1]
      }
      svar <- if (length(xname) == 1) 
        factor(rep(".ALL.", nrow(X)))
      else do.call("interaction", list(X[setdiff(xname, groups)], 
                                       sep = " "))
      group <- X[[groups]]
      glabel <- getlab(group, groups)
    }
    else {
      svar <- factor(rep(".ALL.", nrow(Y)))
      group <- rep("", nrow(Y))
      groups <- group.freq <- NULL
      glabel <- ""
    }
    quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375, 
                       0.5, 0.625, 0.75, 0.875, 0.95, 0.975))
    nv <- ncol(Y)
    nameY <- names(Y)
    R <- list()
    for (strat in levels(svar)) {
      instrat <- svar == strat
      n <- integer(nv)
      type <- n
      comp <- dat <- vector("list", nv)
      names(comp) <- names(dat) <- nameY
      labels <- Units <- vector("character", nv)
      if (test) {
        testresults <- vector("list", nv)
        names(testresults) <- names(comp)
      }
      gr <- group[instrat]
      xms <- xm[instrat]
      if (all(xms != "")) 
        xms <- rep("", length(xms))
      group.freq <- table(gr)
      group.freq <- group.freq[group.freq > 0]
      if (overall) 
        group.freq <- c(group.freq, Combined = sum(group.freq))
      for (i in 1:nv) {
        w <- Y[instrat, i]
        if (length(attr(w, "label"))) 
          labels[i] <- attr(w, "label")
        if (length(attr(w, "units"))) 
          Units[i] <- attr(w, "units")
        if (!inherits(w, "mChoice")) {
          if (!is.factor(w) && !is.logical(w) && length(unique(w[!is.na(w)])) < 
              continuous) 
            w <- as.factor(w)
          s <- !is.na(w)
          if (na.include && !all(s) && length(levels(w))) {
            w <- na.include(w)
            levels(w)[is.na(levels(w))] <- "NA"
            s <- rep(TRUE, length(s))
          }
          n[i] <- sum(s & xms == "")
          w <- w[s]
          g <- gr[s, drop = TRUE]
          if (is.factor(w) || is.logical(w)) {
            tab <- table(w, g)
            if (test) {
              if (is.ordered(w)) 
                testresults[[i]] <- ordTest(g, w)
              else testresults[[i]] <- catTest(tab)
            }
            if (nrow(tab) == 1) {
              b <- casefold(dimnames(tab)[[1]], upper = TRUE)
              pres <- c("1", "Y", "YES", "PRESENT")
              abse <- c("0", "N", "NO", "ABSENT")
              jj <- match(b, pres, nomatch = 0)
              if (jj > 0) 
                bc <- abse[jj]
              else {
                jj <- match(b, abse, nomatch = 0)
                if (jj > 0) 
                  bc <- pres[jj]
              }
              if (jj) {
                tab <- rbind(tab, rep(0, ncol(tab)))
                dimnames(tab)[[1]][2] <- bc
              }
            }
            if (overall) 
              tab <- cbind(tab, Combined = apply(tab, 1, 
                                                 sum))
            comp[[i]] <- tab
            type[i] <- 1
          }
          else {
            sfn <- function(x, quant) {
              o <- options(digits = 10)
              on.exit(options(o))
              c(quantile(x, quant), Mean = mean(x), SD = sqrt(var(x)), 
                N = sum(!is.na(x)))
            }
            qu <- tapply(w, g, sfn, simplify = TRUE, quants)
            if (test) 
              testresults[[i]] <- conTest(g, w)
            if (overall) 
              qu$Combined <- sfn(w, quants)
            comp[[i]] <- matrix(unlist(qu), ncol = length(quants) + 
                                  3, byrow = TRUE, dimnames = list(names(qu), 
                                                                   c(format(quants), "Mean", "SD", "N")))
            if (any(group.freq <= nmin)) 
              dat[[i]] <- lapply(split(w, g), nmin = nmin, 
                                 function(x, nmin) if (length(x) <= nmin) 
                                   x
                                 else NULL)
            type[i] <- 2
          }
        }
        else {
          w <- as.numeric(w) == 1
          n[i] <- sum(!is.na(apply(w, 1, sum)) & xms == 
                        "")
          g <- as.factor(gr)
          ncat <- ncol(w)
          tab <- matrix(NA, nrow = ncat, ncol = length(levels(g)), 
                        dimnames = list(dimnames(w)[[2]], levels(g)))
          if (test) {
            pval <- numeric(ncat)
            names(pval) <- dimnames(w)[[2]]
            d.f. <- stat <- pval
          }
          for (j in 1:ncat) {
            tab[j, ] <- tapply(w[, j], g, sum, simplify = TRUE, 
                               na.rm = TRUE)
            if (test) {
              tabj <- rbind(table(g) - tab[j, ], tab[j, 
                                                     ])
              st <- catTest(tabj)
              pval[j] <- st$P
              stat[j] <- st$stat
              d.f.[j] <- st$df
            }
          }
          if (test) 
            testresults[[i]] <- list(P = pval, stat = stat, 
                                     df = d.f., testname = st$testname, statname = st$statname, 
                                     latexstat = st$latexstat, plotmathstat = st$plotmathstat)
          if (overall) 
            tab <- cbind(tab, Combined = apply(tab, 1, 
                                               sum))
          comp[[i]] <- tab
          type[i] <- 3
        }
      }
      labels <- ifelse(nchar(labels), labels, names(comp))
      R[[strat]] <- list(stats = comp, type = type, group.freq = group.freq, 
                         labels = labels, units = Units, quant = quant, data = dat, 
                         N = sum(!is.na(gr) & xms == ""), n = n, testresults = if (test) testresults)
    }
    structure(list(results = R, group.name = groups, group.label = glabel, 
                   call = call, formula = formula), class = "summaryM")
  }

在尝试对以下数据进行测试后,我收到警告并显示错误:

library(Hmisc)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
> summaryM2(sex ~ treatment, test=TRUE, overall = TRUE)
Error in round(teststat, 2) : 
  non-numeric argument to mathematical function

我尝试逐行逐步执行summaryM2功能,但无法弄清楚导致问题的原因。

1 个答案:

答案 0 :(得分:0)

catTestfisher函数中,输出变量stat(测试统计量)和df(自由度)应该是数字变量,而不是空字符串。在编程中,stat被掩盖到teststat以便在输出之前进行四舍五入(因此round("", 2)的错误消息是non-numeric argument to mathematical function)。参见summary.formula code)中的第1718至1721行。

您可以设置df = NULL,但是stat需要一个值(不是NANULL),否则不返回任何输出。您可以通过设置stat = 0(或其他任何数字)来解决该问题,然后仅使用prtest = "P"显示p值。

catTestfisher2 <- function (tab) 
{
  st <- fisher.test(tab)

  list(P = st$p.value, stat = 0, df = NULL, 
       testname = st$method, statname = "", latexstat = "", namefun = "", 
       plotmathstat = "")
}

output <- summaryM(sex ~ treatment, test=TRUE, overall = TRUE, catTest = catTestfisher2)

print(output, prtest = "P")

Descriptive Statistics  (N=500)

+-------+-----------+-----------+-----------+-------+
|       |Drug       |Placebo    |Combined   |P-value|
|       |(N=257)    |(N=243)    |(N=500)    |       |
+-------+-----------+-----------+-----------+-------+
|sex : m|0.52  (133)|0.52  (126)|0.52  (259)|     1 |
+-------+-----------+-----------+-----------+-------+

请注意,无需定义您自己的summaryM2函数。只需使用catTest =即可传递函数。