在汇总统计表中添加一行总和

时间:2019-07-18 22:23:25

标签: r summary

我正在尝试生成汇总表:

data(mtcars)
library(arsenal)
mtcars
a <- tableby(am ~ mpg + wt, data=mtcars)
summary(a, text = T)

现在,我想添加sum(disp)的另一行(此行不需要p值)。我已经看过其他几个软件包,例如qwraps2,table1,amisc ...,但还是没有运气。我已经使用dplyr,group_by,summarize来获取值。我想使用其中一个汇总表程序包将其他结果一起包括在内。

更新:我尝试了不同的方法,这与我想要的非常接近。只需创建一个新的变量子组sum()即可。

by_cyl <- mtcars %>% 
group_by(cyl) %>% 
mutate(
disp_sum = sum(disp)) %>% 
ungroup()

my_controls<-tableby.control(
total = F, text=TRUE,
numeric.simplify = TRUE, 
numeric.stats = c("meansd"), 
stats.labels = list (
meansd = "Mean", 
countpct = "N (%)"))

tab1 <- tableby(cyl ~ wt + am + notest(disp_sum), 
data = by_cyl, control = my_controls) 
summary(tab1, text=TRUE)

1 个答案:

答案 0 :(得分:0)

键:使用dataframe

install.packages('arsenal')
install.packages('stringr')
install.packages('knitr')
library(arsenal)
library(stringr)
library(knitr)

data(mtcars)

options(scipen=999)


a <- tableby(am ~ mpg + wt, data=mtcars)

saved_table = rbind(as.data.frame(a), 
                   c('am','am',NA,'mpg','sum','Sum','numeric',
                     sum(mtcars[mtcars$am==0, 'mpg']),
                     sum(mtcars[mtcars$am==1, 'mpg']),
                     sum(mtcars[,'mpg']),'Linear Model ANOVA',NA),
                   c('am','am',NA,'wt','sum','Sum','numeric',
                     sum(mtcars[mtcars$am==0, 'wt']),
                     sum(mtcars[mtcars$am==1, 'wt']),
                     sum(mtcars[,'wt']),'Linear Model ANOVA',NA))

# Solve inconsistency created by empty cells by retaining only complete cells
# They will be added back at end of script.
saved_table = saved_table[!saved_table$Total=='', c(4,6,8,9,10,12)]

saved_table[,'0'] = gsub('[c()]', '', saved_table[,'0'])
saved_table[,'1'] = gsub('[c()]', '', saved_table[,'1'])
saved_table[,'Total'] = gsub('[c()]', '', saved_table[,'Total'])

# Round off p-values
saved_table$p.value = round(as.numeric(saved_table$p.value), 3)

# Replace p-values < .001 with 'p < .001'
saved_table$p.value = ifelse(saved_table$p.value < .001, '< .001', saved_table$p.value)

# Remove the zero before the point in p-values
saved_table$p.value = sub('^(-)?0[.]', '\\1.', saved_table$p.value)

# Separate mean and SD for each value of 'variable' and 'am', and round off values
# For variable 'mpg'
# mean (that is, characters before comma)
saved_mean_mpg_0 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' & 
                                              saved_table$label=='Mean (SD)', '0'])

saved_mean_mpg_1 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' & 
                                              saved_table$label=='Mean (SD)', '1'])

saved_mean_mpg_Total = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' & 
                                                  saved_table$label=='Mean (SD)', 'Total'])

# SD (that is, characters after comma)
saved_SD_mpg_0 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' & 
                                            saved_table$label=='Mean (SD)', '0'])

saved_SD_mpg_1 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' & 
                                            saved_table$label=='Mean (SD)', '1'])

saved_SD_mpg_Total = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' & 
                                                saved_table$label=='Mean (SD)', 'Total'])

# Round off values
# mean
saved_mean_mpg_0 = round(as.numeric(saved_mean_mpg_0), 2)
saved_mean_mpg_1 = round(as.numeric(saved_mean_mpg_1), 2)
saved_mean_mpg_Total = round(as.numeric(saved_mean_mpg_Total), 2)
# SD
saved_SD_mpg_0 = round(as.numeric(saved_SD_mpg_0), 2)
saved_SD_mpg_1 = round(as.numeric(saved_SD_mpg_1), 2)
saved_SD_mpg_Total = round(as.numeric(saved_SD_mpg_Total), 2)

# Put means and SDs back into table
saved_table[saved_table$variable=='mpg' & saved_table$label=='Mean (SD)', '0'] = 
  paste0(saved_mean_mpg_0, " (", saved_SD_mpg_0, ")")

saved_table[saved_table$variable=='mpg' & saved_table$label=='Mean (SD)', '1'] = 
  paste0(saved_mean_mpg_1, " (", saved_SD_mpg_1, ")")

saved_table[saved_table$variable=='mpg' & saved_table$label=='Mean (SD)', 'Total'] = 
  paste0(saved_mean_mpg_Total, " (", saved_SD_mpg_Total, ")")


# For variable 'wt'
# mean (that is, characters before comma)
saved_mean_wt_0 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' & 
                                                  saved_table$label=='Mean (SD)', '0'])

saved_mean_wt_1 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' & 
                                                  saved_table$label=='Mean (SD)', '1'])

saved_mean_wt_Total = sub("\\,.*", "", saved_table[saved_table$variable=='wt' & 
                                                      saved_table$label=='Mean (SD)', 'Total'])

# SD (that is, characters after comma)
saved_SD_wt_0 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' & 
                                                saved_table$label=='Mean (SD)', '0'])

saved_SD_wt_1 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' & 
                                                saved_table$label=='Mean (SD)', '1'])

saved_SD_wt_Total = gsub(".*, ", "", saved_table[saved_table$variable=='wt' & 
                                                    saved_table$label=='Mean (SD)', 'Total'])

# Round off values
# mean
saved_mean_wt_0 = round(as.numeric(saved_mean_wt_0), 2)
saved_mean_wt_1 = round(as.numeric(saved_mean_wt_1), 2)
saved_mean_wt_Total = round(as.numeric(saved_mean_wt_Total), 2)
# SD
saved_SD_wt_0 = round(as.numeric(saved_SD_wt_0), 2)
saved_SD_wt_1 = round(as.numeric(saved_SD_wt_1), 2)
saved_SD_wt_Total = round(as.numeric(saved_SD_wt_Total), 2)

# Put means and SDs back into table
saved_table[saved_table$variable=='wt' & saved_table$label=='Mean (SD)', '0'] = 
  paste0(saved_mean_wt_0, " (", saved_SD_wt_0, ")")

saved_table[saved_table$variable=='wt' & saved_table$label=='Mean (SD)', '1'] = 
  paste0(saved_mean_wt_1, " (", saved_SD_wt_1, ")")

saved_table[saved_table$variable=='wt' & saved_table$label=='Mean (SD)', 'Total'] = 
  paste0(saved_mean_wt_Total, " (", saved_SD_wt_Total, ")")


# Separate minimum and maximum for each value of 'variable' and 'am', and round off values
# For variable 'mpg'
# min (that is, characters before comma)
saved_min_mpg_0 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' & 
                                              saved_table$label=='Range', '0'])

saved_min_mpg_1 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' & 
                                              saved_table$label=='Range', '1'])

saved_min_mpg_Total = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' & 
                                                  saved_table$label=='Range', 'Total'])
# SD (that is, characters after comma)
saved_max_mpg_0 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' & 
                                            saved_table$label=='Range', '0'])

saved_max_mpg_1 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' & 
                                            saved_table$label=='Range', '1'])

saved_max_mpg_Total = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' & 
                                                saved_table$label=='Range', 'Total'])

# Round off values
# mean
saved_min_mpg_0 = round(as.numeric(saved_min_mpg_0), 2)
saved_min_mpg_1 = round(as.numeric(saved_min_mpg_1), 2)
saved_min_mpg_Total = round(as.numeric(saved_min_mpg_Total), 2)
# SD
saved_max_mpg_0 = round(as.numeric(saved_max_mpg_0), 2)
saved_max_mpg_1 = round(as.numeric(saved_max_mpg_1), 2)
saved_max_mpg_Total = round(as.numeric(saved_max_mpg_Total), 2)

# Put means and SDs back into table
saved_table[saved_table$variable=='mpg' & saved_table$label=='Range', '0'] = 
  paste0(saved_min_mpg_0, " - ", saved_max_mpg_0)

saved_table[saved_table$variable=='mpg' & saved_table$label=='Range', '1'] = 
  paste0(saved_min_mpg_1, " - ", saved_max_mpg_1)

saved_table[saved_table$variable=='mpg' & saved_table$label=='Range', 'Total'] = 
  paste0(saved_min_mpg_Total, " - ", saved_max_mpg_Total)


# For variable 'wt'
# min (that is, characters before comma)
saved_min_wt_0 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' & 
                                                 saved_table$label=='Range', '0'])

saved_min_wt_1 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' & 
                                                 saved_table$label=='Range', '1'])

saved_min_wt_Total = sub("\\,.*", "", saved_table[saved_table$variable=='wt' & 
                                                     saved_table$label=='Range', 'Total'])
# SD (that is, characters after comma)
saved_max_wt_0 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' & 
                                                 saved_table$label=='Range', '0'])

saved_max_wt_1 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' & 
                                                 saved_table$label=='Range', '1'])

saved_max_wt_Total = gsub(".*, ", "", saved_table[saved_table$variable=='wt' & 
                                                     saved_table$label=='Range', 'Total'])

# Round off values
# mean
saved_min_wt_0 = round(as.numeric(saved_min_wt_0), 2)
saved_min_wt_1 = round(as.numeric(saved_min_wt_1), 2)
saved_min_wt_Total = round(as.numeric(saved_min_wt_Total), 2)
# SD
saved_max_wt_0 = round(as.numeric(saved_max_wt_0), 2)
saved_max_wt_1 = round(as.numeric(saved_max_wt_1), 2)
saved_max_wt_Total = round(as.numeric(saved_max_wt_Total), 2)

# Put means and SDs back into table
saved_table[saved_table$variable=='wt' & saved_table$label=='Range', '0'] = 
  paste0(saved_min_wt_0, " - ", saved_max_wt_0)

saved_table[saved_table$variable=='wt' & saved_table$label=='Range', '1'] = 
  paste0(saved_min_wt_1, " - ", saved_max_wt_1)

saved_table[saved_table$variable=='wt' & saved_table$label=='Range', 'Total'] = 
  paste0(saved_min_wt_Total, " - ", saved_max_wt_Total)

# Add back empty cells to keep p-value where they belong
saved_table = rbind(saved_table, c('mpg',NA,NA,NA,NA,
                                   saved_table[saved_table$variable=='mpg' & 
                                                 saved_table$label=='Mean (SD)', 'p.value']))

saved_table = rbind(saved_table, c('wt',NA,NA,NA,NA,
                                   saved_table[saved_table$variable=='wt' & 
                                                 saved_table$label=='Mean (SD)', 'p.value']))

# Remove p-values where they don't belong
saved_table$p.value = ifelse(!is.na(saved_table$label), NA, saved_table$p.value)

# Rename columns
colnames(saved_table)[colnames(saved_table)=="variable"] <- "Variable"
colnames(saved_table)[colnames(saved_table)=="label"] <- "Measure"
colnames(saved_table)[colnames(saved_table)=="p.value"] <- "p value"

# Order
saved_table = saved_table[with(saved_table, order(Variable, rev(Measure))),]
saved_table = saved_table[with(saved_table, order(Variable, rev(Measure))),]

options(knitr.kable.NA = '')
knitr::kable(saved_table, row.names = FALSE)

结果:

|Variable |Measure   |0            |1            |Total        |p value |
|:--------|:---------|:------------|:------------|:------------|:-------|
|mpg      |          |             |             |             |< .001  |
|mpg      |Sum       |325.8        |317.1        |642.9        |        |
|mpg      |Range     |10.4 - 24.4  |15 - 33.9    |10.4 - 33.9  |        |
|mpg      |Mean (SD) |17.15 (3.83) |24.39 (6.17) |20.09 (6.03) |        |
|wt       |          |             |             |             |< .001  |
|wt       |Sum       |71.609       |31.343       |102.952      |        |
|wt       |Range     |2.46 - 5.42  |1.51 - 3.57  |1.51 - 5.42  |        |
|wt       |Mean (SD) |3.77 (0.78)  |2.41 (0.62)  |3.22 (0.98)  |        |