我正在尝试生成汇总表:
data(mtcars)
library(arsenal)
mtcars
a <- tableby(am ~ mpg + wt, data=mtcars)
summary(a, text = T)
现在,我想添加sum(disp)的另一行(此行不需要p值)。我已经看过其他几个软件包,例如qwraps2,table1,amisc ...,但还是没有运气。我已经使用dplyr,group_by,summarize来获取值。我想使用其中一个汇总表程序包将其他结果一起包括在内。
更新:我尝试了不同的方法,这与我想要的非常接近。只需创建一个新的变量子组sum()即可。
by_cyl <- mtcars %>%
group_by(cyl) %>%
mutate(
disp_sum = sum(disp)) %>%
ungroup()
my_controls<-tableby.control(
total = F, text=TRUE,
numeric.simplify = TRUE,
numeric.stats = c("meansd"),
stats.labels = list (
meansd = "Mean",
countpct = "N (%)"))
tab1 <- tableby(cyl ~ wt + am + notest(disp_sum),
data = by_cyl, control = my_controls)
summary(tab1, text=TRUE)
答案 0 :(得分:0)
键:使用dataframe。
install.packages('arsenal')
install.packages('stringr')
install.packages('knitr')
library(arsenal)
library(stringr)
library(knitr)
data(mtcars)
options(scipen=999)
a <- tableby(am ~ mpg + wt, data=mtcars)
saved_table = rbind(as.data.frame(a),
c('am','am',NA,'mpg','sum','Sum','numeric',
sum(mtcars[mtcars$am==0, 'mpg']),
sum(mtcars[mtcars$am==1, 'mpg']),
sum(mtcars[,'mpg']),'Linear Model ANOVA',NA),
c('am','am',NA,'wt','sum','Sum','numeric',
sum(mtcars[mtcars$am==0, 'wt']),
sum(mtcars[mtcars$am==1, 'wt']),
sum(mtcars[,'wt']),'Linear Model ANOVA',NA))
# Solve inconsistency created by empty cells by retaining only complete cells
# They will be added back at end of script.
saved_table = saved_table[!saved_table$Total=='', c(4,6,8,9,10,12)]
saved_table[,'0'] = gsub('[c()]', '', saved_table[,'0'])
saved_table[,'1'] = gsub('[c()]', '', saved_table[,'1'])
saved_table[,'Total'] = gsub('[c()]', '', saved_table[,'Total'])
# Round off p-values
saved_table$p.value = round(as.numeric(saved_table$p.value), 3)
# Replace p-values < .001 with 'p < .001'
saved_table$p.value = ifelse(saved_table$p.value < .001, '< .001', saved_table$p.value)
# Remove the zero before the point in p-values
saved_table$p.value = sub('^(-)?0[.]', '\\1.', saved_table$p.value)
# Separate mean and SD for each value of 'variable' and 'am', and round off values
# For variable 'mpg'
# mean (that is, characters before comma)
saved_mean_mpg_0 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Mean (SD)', '0'])
saved_mean_mpg_1 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Mean (SD)', '1'])
saved_mean_mpg_Total = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Mean (SD)', 'Total'])
# SD (that is, characters after comma)
saved_SD_mpg_0 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Mean (SD)', '0'])
saved_SD_mpg_1 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Mean (SD)', '1'])
saved_SD_mpg_Total = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Mean (SD)', 'Total'])
# Round off values
# mean
saved_mean_mpg_0 = round(as.numeric(saved_mean_mpg_0), 2)
saved_mean_mpg_1 = round(as.numeric(saved_mean_mpg_1), 2)
saved_mean_mpg_Total = round(as.numeric(saved_mean_mpg_Total), 2)
# SD
saved_SD_mpg_0 = round(as.numeric(saved_SD_mpg_0), 2)
saved_SD_mpg_1 = round(as.numeric(saved_SD_mpg_1), 2)
saved_SD_mpg_Total = round(as.numeric(saved_SD_mpg_Total), 2)
# Put means and SDs back into table
saved_table[saved_table$variable=='mpg' & saved_table$label=='Mean (SD)', '0'] =
paste0(saved_mean_mpg_0, " (", saved_SD_mpg_0, ")")
saved_table[saved_table$variable=='mpg' & saved_table$label=='Mean (SD)', '1'] =
paste0(saved_mean_mpg_1, " (", saved_SD_mpg_1, ")")
saved_table[saved_table$variable=='mpg' & saved_table$label=='Mean (SD)', 'Total'] =
paste0(saved_mean_mpg_Total, " (", saved_SD_mpg_Total, ")")
# For variable 'wt'
# mean (that is, characters before comma)
saved_mean_wt_0 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Mean (SD)', '0'])
saved_mean_wt_1 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Mean (SD)', '1'])
saved_mean_wt_Total = sub("\\,.*", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Mean (SD)', 'Total'])
# SD (that is, characters after comma)
saved_SD_wt_0 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Mean (SD)', '0'])
saved_SD_wt_1 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Mean (SD)', '1'])
saved_SD_wt_Total = gsub(".*, ", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Mean (SD)', 'Total'])
# Round off values
# mean
saved_mean_wt_0 = round(as.numeric(saved_mean_wt_0), 2)
saved_mean_wt_1 = round(as.numeric(saved_mean_wt_1), 2)
saved_mean_wt_Total = round(as.numeric(saved_mean_wt_Total), 2)
# SD
saved_SD_wt_0 = round(as.numeric(saved_SD_wt_0), 2)
saved_SD_wt_1 = round(as.numeric(saved_SD_wt_1), 2)
saved_SD_wt_Total = round(as.numeric(saved_SD_wt_Total), 2)
# Put means and SDs back into table
saved_table[saved_table$variable=='wt' & saved_table$label=='Mean (SD)', '0'] =
paste0(saved_mean_wt_0, " (", saved_SD_wt_0, ")")
saved_table[saved_table$variable=='wt' & saved_table$label=='Mean (SD)', '1'] =
paste0(saved_mean_wt_1, " (", saved_SD_wt_1, ")")
saved_table[saved_table$variable=='wt' & saved_table$label=='Mean (SD)', 'Total'] =
paste0(saved_mean_wt_Total, " (", saved_SD_wt_Total, ")")
# Separate minimum and maximum for each value of 'variable' and 'am', and round off values
# For variable 'mpg'
# min (that is, characters before comma)
saved_min_mpg_0 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Range', '0'])
saved_min_mpg_1 = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Range', '1'])
saved_min_mpg_Total = sub("\\,.*", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Range', 'Total'])
# SD (that is, characters after comma)
saved_max_mpg_0 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Range', '0'])
saved_max_mpg_1 = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Range', '1'])
saved_max_mpg_Total = gsub(".*, ", "", saved_table[saved_table$variable=='mpg' &
saved_table$label=='Range', 'Total'])
# Round off values
# mean
saved_min_mpg_0 = round(as.numeric(saved_min_mpg_0), 2)
saved_min_mpg_1 = round(as.numeric(saved_min_mpg_1), 2)
saved_min_mpg_Total = round(as.numeric(saved_min_mpg_Total), 2)
# SD
saved_max_mpg_0 = round(as.numeric(saved_max_mpg_0), 2)
saved_max_mpg_1 = round(as.numeric(saved_max_mpg_1), 2)
saved_max_mpg_Total = round(as.numeric(saved_max_mpg_Total), 2)
# Put means and SDs back into table
saved_table[saved_table$variable=='mpg' & saved_table$label=='Range', '0'] =
paste0(saved_min_mpg_0, " - ", saved_max_mpg_0)
saved_table[saved_table$variable=='mpg' & saved_table$label=='Range', '1'] =
paste0(saved_min_mpg_1, " - ", saved_max_mpg_1)
saved_table[saved_table$variable=='mpg' & saved_table$label=='Range', 'Total'] =
paste0(saved_min_mpg_Total, " - ", saved_max_mpg_Total)
# For variable 'wt'
# min (that is, characters before comma)
saved_min_wt_0 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Range', '0'])
saved_min_wt_1 = sub("\\,.*", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Range', '1'])
saved_min_wt_Total = sub("\\,.*", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Range', 'Total'])
# SD (that is, characters after comma)
saved_max_wt_0 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Range', '0'])
saved_max_wt_1 = gsub(".*, ", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Range', '1'])
saved_max_wt_Total = gsub(".*, ", "", saved_table[saved_table$variable=='wt' &
saved_table$label=='Range', 'Total'])
# Round off values
# mean
saved_min_wt_0 = round(as.numeric(saved_min_wt_0), 2)
saved_min_wt_1 = round(as.numeric(saved_min_wt_1), 2)
saved_min_wt_Total = round(as.numeric(saved_min_wt_Total), 2)
# SD
saved_max_wt_0 = round(as.numeric(saved_max_wt_0), 2)
saved_max_wt_1 = round(as.numeric(saved_max_wt_1), 2)
saved_max_wt_Total = round(as.numeric(saved_max_wt_Total), 2)
# Put means and SDs back into table
saved_table[saved_table$variable=='wt' & saved_table$label=='Range', '0'] =
paste0(saved_min_wt_0, " - ", saved_max_wt_0)
saved_table[saved_table$variable=='wt' & saved_table$label=='Range', '1'] =
paste0(saved_min_wt_1, " - ", saved_max_wt_1)
saved_table[saved_table$variable=='wt' & saved_table$label=='Range', 'Total'] =
paste0(saved_min_wt_Total, " - ", saved_max_wt_Total)
# Add back empty cells to keep p-value where they belong
saved_table = rbind(saved_table, c('mpg',NA,NA,NA,NA,
saved_table[saved_table$variable=='mpg' &
saved_table$label=='Mean (SD)', 'p.value']))
saved_table = rbind(saved_table, c('wt',NA,NA,NA,NA,
saved_table[saved_table$variable=='wt' &
saved_table$label=='Mean (SD)', 'p.value']))
# Remove p-values where they don't belong
saved_table$p.value = ifelse(!is.na(saved_table$label), NA, saved_table$p.value)
# Rename columns
colnames(saved_table)[colnames(saved_table)=="variable"] <- "Variable"
colnames(saved_table)[colnames(saved_table)=="label"] <- "Measure"
colnames(saved_table)[colnames(saved_table)=="p.value"] <- "p value"
# Order
saved_table = saved_table[with(saved_table, order(Variable, rev(Measure))),]
saved_table = saved_table[with(saved_table, order(Variable, rev(Measure))),]
options(knitr.kable.NA = '')
knitr::kable(saved_table, row.names = FALSE)
结果:
|Variable |Measure |0 |1 |Total |p value |
|:--------|:---------|:------------|:------------|:------------|:-------|
|mpg | | | | |< .001 |
|mpg |Sum |325.8 |317.1 |642.9 | |
|mpg |Range |10.4 - 24.4 |15 - 33.9 |10.4 - 33.9 | |
|mpg |Mean (SD) |17.15 (3.83) |24.39 (6.17) |20.09 (6.03) | |
|wt | | | | |< .001 |
|wt |Sum |71.609 |31.343 |102.952 | |
|wt |Range |2.46 - 5.42 |1.51 - 3.57 |1.51 - 5.42 | |
|wt |Mean (SD) |3.77 (0.78) |2.41 (0.62) |3.22 (0.98) | |