在R中为tbl_summary创建一个函数

时间:2020-11-03 07:23:17

标签: r function gtsummary

我有一个如下的示例数据集,以及生成所需表的代码。但是,我还有很多要添加到表中的变量。对每个变量重复相同的代码以创建表将使代码超长。我正在尝试将tbl_summary编写为一个函数,但似乎没有用,而且我也不知道如何解决该问题。

library(gtsummary)
library(tidyverse)

test <- data.frame("Gender" = c("Female", "Male", "Male", "Female", "Female", "Female", "Male", "Female", "Female", "Male"),
                   "source" = c("FFQ", "Foodworks", "FFQ", "FFQ", "FFQ", "FFQ", "FFQ", "Foodworks", "Foodworks", "Foodworks"),
                   "EnergyDF_kJ_total" = c(8060.61, 16802.2, 10755.57, 8061.82, 8995.44, 3838.91, 7495.89, 8057.92, 15831.68, 5298.25),
                   "vegetable_score" = c(6.47, 5.55, 8.39, 5.17, 10, 1.82, 3.11, 1.21, 2.76, 1.21)
)

# create table overall
tbl_EnergyDF_kJ_total <-
  test %>%
  select(Gender, EnergyDF_kJ_total) %>%
  tbl_summary(by = Gender, missing = "no",
              type = EnergyDF_kJ_total ~ "continuous",
              statistic = EnergyDF_kJ_total ~ "{mean} ({sd})") %>%
  modify_header(stat_by = "**{level}**") # CHANGE COLUMN HEADER

# REMOVE STATISTICS FOR EnergyDF_kJ_total FROM TABLE
tbl_EnergyDF_kJ_total$table_body <-
  tbl_EnergyDF_kJ_total$table_body %>%
  mutate_at(vars(stat_1, stat_2), ~NA_character_)

# create table stratified by source
tbl_EnergyDF_kJ_total_by_source <-
  test %>%
  # keep the continuous var and the two categorical variables
  select(Gender, EnergyDF_kJ_total, source) %>%
  group_nest(source) %>%
  mutate(
    tbl = map2(
      source, data, 
      ~tbl_summary(.y, by = Gender, 
                   type = EnergyDF_kJ_total ~ "continuous",
                   statistic = EnergyDF_kJ_total ~ "{mean} ({sd})",
                   label = list(EnergyDF_kJ_total = .x), missing = "no") %>%
        add_overall(col_label = "**Overall**") %>%
        add_n()
    )
  ) %>%
  pull(tbl) %>%
  tbl_stack()


# stacking the tables
tbl_stack(list(tbl_EnergyDF_kJ_total, tbl_EnergyDF_kJ_total_by_source)) %>%
  modify_table_body(dplyr::relocate, c("n", "stat_0"), .after = "label") %>%
  # indenting the source rows
  as_gt()  %>%
  gt::tab_style(style = gt::cell_text(indent = gt::px(10), align = "left"), 
                locations = gt::cells_body(columns = gt::vars(label), 
                                           rows = !is.na(n)))

以下是我尝试为总体表创建函数的代码,但是没有用。任何帮助将不胜感激。

x <- function(test, var1, var2) {
test %>%
select(var1, var2) %>%
tbl_summary(by = var1, missing = "no",
type = var2 ~ "continuous",
statistic = var2 ~ "{mean} ({sd})") %>%
modify_header(stat_by = "{level}") # CHANGE COLUMN HEADER
}

test1 <- x(test, Gender, EnergyDF_kJ_total)

1 个答案:

答案 0 :(得分:0)

这是您的表已被功能化的示例。编程愉快!

remotes::install_github("ddsjoberg/gtsummary") # installing version 1.3.5.9007
library(gtsummary)
library(tidyverse)
packageVersion("gtsummary")

test <- 
  data.frame("Gender" = c("Female", "Male", "Male", "Female", "Female", "Female", "Male", "Female", "Female", "Male"),
             "source" = c("FFQ", "Foodworks", "FFQ", "FFQ", "FFQ", "FFQ", "FFQ", "Foodworks", "Foodworks", "Foodworks"),
             "EnergyDF_kJ_total" = c(8060.61, 16802.2, 10755.57, 8061.82, 8995.44, 3838.91, 7495.89, 8057.92, 15831.68, 5298.25),
             "vegetable_score" = c(6.47, 5.55, 8.39, 5.17, 10, 1.82, 3.11, 1.21, 2.76, 1.21))


my_table <- function(data, variable) {
  data <- data[c("Gender", "source", variable)]
  
  # create table overall
  tbl_header_row <-
    data %>%
    select(all_of(c("Gender", variable))) %>%
    tbl_summary(by = Gender, missing = "no",
                type = everything() ~ "continuous",
                statistic = everything() ~ "{mean} ({sd})") %>%
    modify_header(stat_by = "**{level}**") # CHANGE COLUMN HEADER
  
  # REMOVE STATISTICS FOR variable FROM TABLE
  tbl_header_row$table_body <-
    tbl_header_row$table_body %>%
    mutate_at(vars(stat_1, stat_2), ~NA_character_)
  
  # create table stratified by source
  tbl_variable_by_source <-
    data %>%
    # keep the continuous var and the two categorical variables
    select(all_of(c("Gender", variable, "source"))) %>%
    group_nest(source) %>%
    mutate(
      tbl = map2(
        source, data, 
        ~tbl_summary(.y, by = Gender, 
                     type = everything() ~ "continuous",
                     statistic = everything() ~ "{mean} ({sd})",
                     label = everything() ~ .x, missing = "no") %>%
          add_overall(col_label = "**Overall**") %>%
          add_n()
      )
    ) %>%
    pull(tbl) %>%
    tbl_stack()
  
  # stacking the tables
  tbl_stack(list(tbl_header_row, tbl_variable_by_source)) %>%
    modify_table_body(dplyr::relocate, c("n", "stat_0"), .after = "label")
}

# building each table individually
tbl1 <- my_table(test, "EnergyDF_kJ_total")
tbl2 <- my_table(test, "vegetable_score")

# stacking all tables, and indenting rows
list(tbl1, tbl2) %>%
  tbl_stack() %>%
  # indenting the source rows
  as_gt()  %>%
  gt::tab_style(style = gt::cell_text(indent = gt::px(10), align = "left"), 
                locations = gt::cells_body(columns = gt::vars(label), 
                                           rows = !is.na(n)))

enter image description here