制作带循环的基线表,有更好的方法吗?

时间:2018-04-12 08:34:58

标签: r loops grep

我正在尝试构建一个包含基线数据的表。我有一个数据框,包含6个不同轮次的5000人的测量值。每轮我有大约1000次测量。列根据变量和圆形命名,例如cylr1 cylr2 cylr3。我有连续的,分类的和二分的数据。我需要这些数据的平均值/标准差或每组的数量和百分比,按照兴趣类别和测量轮次进行划分。

我设法通过在函数内部编写一组循环来实现这一点,允许我创建我感兴趣的任何变量所需的表。但是,它是一个非常大的功能,需要一些时间来处理。下面的示例适用于mtcars,以显示结构的外观。我主要使用grep来获取colnames,无论它是什么回合,并循环遍历不同的回合和级别。有没有更有效的方法来做到这一点?

data(mtcars)
mtcars$am <- mtcars$am + 1
mtcarsr1 <- mtcars
colnames(mtcarsr1) <- paste0(colnames(mtcarsr1),"r1")
mtcarsr1 <- subset(mtcarsr1, select = -mpgr1)
mtcarsr2 <- mtcars
colnames(mtcarsr2) <- paste0(colnames(mtcarsr2),"r2")
mtcarsr3 <- mtcars
colnames(mtcarsr3) <- paste0(colnames(mtcarsr3),"r3")
vec <- c(rep(1,8), rep(2,8), rep(3,8), rep(4,8))
mtcarsrounds <- data.frame(vec, mtcarsr1, mtcarsr2, mtcarsr3)


# variable: category to sort by
# niveaus: number of levels within category
baseline.cars <- function(variable, niveaus){
  # Make separate dataframe per level of the category of intereset
  base <- list()
  for (i in 1:niveaus){
    base.loop <- mtcarsrounds[variable == i,]
    base.loop <- base.loop[grep("carb|disp|drat|vs|mpg", colnames(base.loop))]
    base[[i]] <- base.loop
  }

  # For all categories, place data from rounds in separate dataframe
  base.ronde <- list()
  base.base <- list()
  for (j in 1:niveaus){
    for (t in 1:3){
      base.loop <- base[[j]][grep(paste0("r",t), colnames(base[[j]]))]
      base.ronde[[t]] <- base.loop
      base.base[[j]] <- base.ronde
    }
  } 


  # Perform necessary counts and calculations per variable for baseline characteristics
  # tryCatch{} functions ignore errors when variable isn't measured in certain rounds
  # if{} functions convert NULL values caused by tryCatch{} to NA, so they can be included in dataframes
  # if{} wrapper around whole loop is to ignore errors caused by empty categories
  base.loop <- list()
  base.desc <- list()
  for (j in 1:niveaus){
    for (t in 1:3){
      if (nrow(base.base[[j]][[t]]) > 0){
        carb1p <- round(sum(!is.na(base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))]) & base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))] == 1)/
                          length(!is.na(base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))])),2)            
        carb1 <- sum(!is.na(base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))]) & base.base[[j]][[t]][grep("carb", colnames(base.base[[j]][[t]]))] == 1)
        disp.mean <- round(sapply(base.base[[j]][[t]][grep("disp", colnames(base.base[[j]][[t]]))], mean, na.rm = TRUE),1)
        disp.sd <- round(sapply(base.base[[j]][[t]][grep("disp", colnames(base.base[[j]][[t]]))], sd, na.rm = TRUE),1)
        mpg.mean <- tryCatch({mpg.mean <- round(sapply(base.base[[j]][[t]][grep("mpg", colnames(base.base[[j]][[t]]))],mean, na.rm = TRUE),1) }, error = function (e){})
        mpg.sd <- tryCatch({mpg.sd <- round(sapply(base.base[[j]][[t]] 
                 [grep("mpg", colnames(base.base[[j]][[t]]))], sd, na.rm = 
                 TRUE),1)}, error = function (e){})
        if (is.null(mpg.mean)) {mpg.mean <- NA}
        if (is.null(mpg.sd)) {mpg.sd <- NA}
        base.loop[[t]] <- data.frame(carb1, carb1p, carb2, carb2p,
                 disp.mean, disp.sd, mpg.mean, mpg.sd)
          }
        }
        base.desc[[j]] <- base.loop
      }

      # Combine dataframes from separate rounds together
      for (i in 1:length(base.desc)){
        base.desc[[i]] <- rbind(base.desc[[i]][[1]], base.desc[[i]][[2]], 
        base.desc[[i]][[3]])
        rownames(base.desc[[i]]) <- c("Ronde 1","Ronde 2","Ronde 3")
      }

      print(base.desc)
    }

    # Run the function
    baseline.cars(mtcarsrounds$vec, 4)

编辑:将其缩短

0 个答案:

没有答案