Question

我正在尝试使用dplyr和purrr来为许多变量编程生成多个卡方分析。过去，我只是复制和粘贴代码，但这很费力，而且容易出错。我设法制作了一个函数，可以生成我想要的输出表。但是，当我尝试使用map（）函数时，它会产生如下错误。我认为这可能是我无法理解的语法相关内容，可能是由于处理了“ covariates_list”中的引号。谢谢您能给我的任何帮助，我将非常感谢。这是带有虚拟数据集的代码，格式与我的相同。

library(tidyverse)

# Example data input in similar format to my data
df <- data.frame(stringsAsFactors=FALSE,
           id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
                  15L, 16L, 17L, 18L, 19L, 20L),
           outcome = c("y", "y", "y", "y", "n", "n", "n", "y", "y", "y", "y", "y",
                       "n", "n", "n", "n", "n", "n", "n", "n"),
           covariate1 = c("y", "n", "n", "n", "n", "y", "n", "n", "n", "n", "y", "n",
                          "n", "n", "n", "y", "n", "n", "n", "y"),
           covariate2 = c("y", "y", "n", "n", "n", "y", "y", "y", "n", "n", "n", "y",
                          "n", "n", "n", "y", "n", "n", "y", "y"),
           covariate3 = c("y", "y", "n", "n", "n", "n", "n", "y", "y", "n", "y", "n",
                          "n", "n", "n", "n", "n", "n", "n", "n")
)


### Defining a function that will make a frequency table, and add a chisq p value to this.
univariate_table <- function(a,b,dat){ 
  quo_a <- enquo(a)
  quo_b <- enquo(b)
  z1 <- dat %>% count(!!quo_b,!!quo_a) 
  z2 <- sum(z1$n)
  z3 <- z1 %>% mutate(percentage = n/z2*100)
  z4 <- dat %>% summarise(chisq.test(!!quo_a,!!quo_b)$p.value)
  z5 <- as.numeric(z4)
  z6 <- z3 %>% mutate(chisq_pvalue = z5) 
  return(z6)
}

### I can get the function to run independantly on each covariate
univariate_table(covariate1,outcome,df)

### Using the code below, I cannot get a purrr / loop / map function to run through a list of the covariates without recieving this error:
#Error in summarise_impl(.data, dots) : 
#  Evaluation error: 'x' and 'y' must have the same length. 
covariates_list <- list("covariate1","covariate2","covariate3")
map(covariates_list,univariate_table,outcome,df)

Answer 1

我们将禁止显示警告

univariate_table <- function(a, b, dat) {

  quo_a <- enquo(a)
  quo_b <- enquo(b)

  z1 <- dat %>% count(!!quo_b, !!quo_a)
  z2 <- sum(z1$n)
  z3 <- z1 %>% mutate(percentage = n / z2 * 100)
  z4 <- dat %>% summarise(
    suppressWarnings(chisq.test(!!quo_a, !!quo_b))$p.value
  )
  z5 <- as.numeric(z4)
  z6 <- z3 %>% mutate(chisq_pvalue = z5)

  return(z6)

}

然后稍微修改一下调用函数的方式：

covariates_list %>% 
  syms() %>% 
  map(function(cov) univariate_table(!!cov, outcome, df))
## [[1]]
## # A tibble: 4 x 5
##   outcome covariate1     n percentage chisq_pvalue
##   <chr>   <chr>      <int>      <dbl>        <dbl>
## 1 n       n              8        40.           1.
## 2 n       y              3        15.           1.
## 3 y       n              7        35.           1.
## 4 y       y              2        10.           1.
## 
## [[2]]
## # A tibble: 4 x 5
##   outcome covariate2     n percentage chisq_pvalue
##   <chr>   <chr>      <int>      <dbl>        <dbl>
## 1 n       n              6        30.           1.
## 2 n       y              5        25.           1.
## 3 y       n              5        25.           1.
## 4 y       y              4        20.           1.
## 
## [[3]]
## # A tibble: 3 x 5
##   outcome covariate3     n percentage chisq_pvalue
##   <chr>   <chr>      <int>      <dbl>        <dbl>
## 1 n       n             11       55.0       0.0195
## 2 y       n              4       20.0       0.0195
## 3 y       y              5       25.0       0.0195

为什么在尝试运行用户函数进行重复卡方分析时，purrr map（）函数会生成错误

1 个答案: