我正在尝试构建一个包含基线数据的表。我有一个数据框,包含6个不同轮次的5000人的测量值。每轮我有大约1000次测量。列根据变量和圆形命名,例如cylr1
cylr2
cylr3
。我有连续的,分类的和二分的数据。我需要这些数据的平均值/标准差或每组的数量和百分比,按照兴趣类别和测量轮次进行划分。
我设法通过在函数内部编写一组循环来实现这一点,允许我创建我感兴趣的任何变量所需的表。但是,它是一个非常大的功能,需要一些时间来处理。下面的示例适用于mtcars
,以显示结构的外观。我主要使用grep
来获取colnames,无论它是什么回合,并循环遍历不同的回合和级别。有没有更有效的方法来做到这一点?
data(mtcars)
mtcars$am <- mtcars$am + 1
mtcarsr1 <- mtcars
colnames(mtcarsr1) <- paste0(colnames(mtcarsr1),"r1")
mtcarsr1 <- subset(mtcarsr1, select = -mpgr1)
mtcarsr2 <- mtcars
colnames(mtcarsr2) <- paste0(colnames(mtcarsr2),"r2")
mtcarsr3 <- mtcars
colnames(mtcarsr3) <- paste0(colnames(mtcarsr3),"r3")
vec <- c(rep(1,8), rep(2,8), rep(3,8), rep(4,8))
mtcarsrounds <- data.frame(vec, mtcarsr1, mtcarsr2, mtcarsr3)
# variable: category to sort by
# niveaus: number of levels within category
baseline.cars <- function(variable, niveaus){
# Make separate dataframe per level of the category of intereset
base <- list()
for (i in 1:niveaus){
base.loop <- mtcarsrounds[variable == i,]
base.loop <- base.loop[grep("carb|disp|drat|vs|mpg", colnames(base.loop))]
base[[i]] <- base.loop
}
# For all categories, place data from rounds in separate dataframe
base.ronde <- list()
base.base <- list()
for (j in 1:niveaus){
for (t in 1:3){
base.loop <- base[[j]][grep(paste0("r",t), colnames(base[[j]]))]
base.ronde[[t]] <- base.loop
base.base[[j]] <- base.ronde
}
}
# Perform necessary counts and calculations per variable for baseline characteristics
# tryCatch{} functions ignore errors when variable isn't measured in certain rounds
# if{} functions convert NULL values caused by tryCatch{} to NA, so they can be included in dataframes
# if{} wrapper around whole loop is to ignore errors caused by empty categories
base.loop <- list()
base.desc <- list()
for (j in 1:niveaus){
for (t in 1:3){
if (nrow(base.base[[j]][[t]]) > 0){
carb1p <- round(sum(!is.na(base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))]) & base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))] == 1)/
length(!is.na(base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))])),2)
carb1 <- sum(!is.na(base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))]) & base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))] == 1)
disp.mean <- round(sapply(base.base[[j]][[t]][grep("disp", colnames(base.base[[j]][[t]]))], mean, na.rm = TRUE),1)
disp.sd <- round(sapply(base.base[[j]][[t]][grep("disp", colnames(base.base[[j]][[t]]))], sd, na.rm = TRUE),1)
mpg.mean <- tryCatch({mpg.mean <- round(sapply(base.base[[j]][[t]][grep("mpg", colnames(base.base[[j]][[t]]))],mean, na.rm = TRUE),1) }, error = function (e){})
mpg.sd <- tryCatch({mpg.sd <- round(sapply(base.base[[j]][[t]]
[grep("mpg", colnames(base.base[[j]][[t]]))], sd, na.rm =
TRUE),1)}, error = function (e){})
if (is.null(mpg.mean)) {mpg.mean <- NA}
if (is.null(mpg.sd)) {mpg.sd <- NA}
base.loop[[t]] <- data.frame(carb1, carb1p, carb2, carb2p,
disp.mean, disp.sd, mpg.mean, mpg.sd)
}
}
base.desc[[j]] <- base.loop
}
# Combine dataframes from separate rounds together
for (i in 1:length(base.desc)){
base.desc[[i]] <- rbind(base.desc[[i]][[1]], base.desc[[i]][[2]],
base.desc[[i]][[3]])
rownames(base.desc[[i]]) <- c("Ronde 1","Ronde 2","Ronde 3")
}
print(base.desc)
}
# Run the function
baseline.cars(mtcarsrounds$vec, 4)
编辑:将其缩短