操纵Data.Frames

时间:2017-09-13 15:19:10

标签: r

我有两个不同的data.frame对象,有两列。这些data.frame对象称为Experiment1,Experiment2,Experiment3 ...... Experiment {n}

> Experiment1 Name Statistic 1 a -1.050 2 b 0.058 3 c 0.489 4 d 1.153 5 e 0.736 6 f -1.155 7 g 0.186

> Experiment2 Name Statistic 1 a 0.266 2 b 0.067 3 c -0.385 4 d 0.068 5 e 1.563 6 f 0.745 7 g 1.671

> Experiment3 Name Statistic 1 a 0.004 2 b -2.074 3 c 0.746 4 d 0.207 5 e 0.700 6 f 0.158 7 g 0.067

> Experiment4 Name Statistic 1 a 0.255 2 b -0.542 3 c 0.477 4 d 1.552 5 e 0.025 6 f 1.027 7 g 0.326

> Experiment5 Name Statistic 1 a 1.817 2 b 0.147 3 c 0.052 4 d 0.194 5 e -0.137 6 f 2.321 7 g -0.939

> Experiment6 Name Statistic 1 a 1.817 2 b 0.147 3 c 0.052 4 d 0.194 5 e -0.137 6 f 2.321 7 g -0.939

> ExperimentalDesign$metabolite [1] "butyrate" "h2s" "hippurate" "acetate" "propionate" "butyrate_2" [7] "h2s_2" "hippurate_2" "acetate_2" "propionate_2"

我有三个不同的data.frame对象。这些data.frame对象称为Experiment1,Experiment2,Experiment3 ... Experiment {n}(其中n是NumberTubes除以NumberParameters)。

现在我想从每个data.frame对象合并表中的。$ Statistic列(每个输出3个统计列..) tab_1< - cbind(Experiment1,Experiment2 $ Statistic,Experiment3 $ Statistic)。另外,按顺序从ExperimentalDesign $代谢物中提取代谢物。例如Table_3会变得马蹄铁。

  1. NumberRepeats< - 3(表1 =合并Experiment_1, Experiment_2 $ Statistic,Experiment_3 $ Statistic,Table_2 = merge Experiment_4,Experiment_5 $ Statistic,Experiment_6 $ Statistic等。)
  2. Experiment_n< -17(例如Experiment_1,Experiment_2等等)
  3. skipTube< - c(11)(跳过Experiment_11)
  4. 期望的产出:

    TABLE_1: Experiment1 Experiment2 Experiment3 metabolite a -1.050 0.266 0.004 butyrate b 0.058 0.067 -2.074 butyrate c 0.489 -0.385 0.746 butyrate d 1.153 0.068 0.207 butyrate e 0.736 1.563 0.700 butyrate f -1.155 0.745 0.158 butyrate g 0.186 1.671 0.067 butyrate

    TABLE_2

    Experiment4 Experiment5 Experiment6 metabolite a 0.255 1.817 -0.827 h2s b -0.542 0.147 0.219 h2s c 0.477 0.052 1.561 h2s d 1.552 0.194 1.493 h2s e 0.025 -0.137 0.063 h2s f 1.027 2.321 0.844 h2s g 0.326 -0.939 -0.373 h2s

    真的很开心:

    使用此功能,您可以将不同数据框对象的列合并到一个表中。您可以通过NumberRepeats变量控制列数。存储在列表中的所有表都具有相同数量的数据列,如
    NumberRepeats变量,除了最后一个表...

    # created test data
    for(i in 1:17){
      Name <- letters[1:7]
      Statistic <- round(rnorm(7), 3)
      assign(paste0("Experiment",i), data.frame(Name, Statistic))
    }    
    
    # set some parameters
    NumberRepeats <- 3
    Experiment_n <- 17
    skipTube <- c(11)
    

    #let go

    out <- list()
    list_index <- 1
    counter <- 1
    while(counter < Experiment_n) {
    
      tab <- NULL
      nam <- NULL
      while((is.null(tab) || ncol(tab) < NumberRepeats) & Experiment_n >= counter){
        if(!any(counter == skipTube)){
          tab <- cbind(tab, get(paste0("Experiment", counter))$Statistic)
          # tab <- as.data.frame(tab)
          nam <- c(nam,paste0("Experiment", counter))
        }
        counter <- counter + 1  
      }
      colnames(tab) <- nam
      rownames(tab) <- as.matrix(Experiment1$Name)
    
      out[[list_index]] <- tab
      assign(paste0('table_', list_index), tab)
    
      list_index <- list_index + 1  
    }
    out
    

    以上代码的输出:

    Experiment1 Experiment2 Experiment3 a 0.136 0.260 -1.089
    b 0.946 -1.165 -0.599
    c -0.462 -1.445 0.044
    d -1.936 -0.391 0.622
    e 0.537 -0.502 1.192
    f 0.259 0.096 -1.873
    g 1.352 0.049 -0.644

    以上代码所需的输出:

    Experiment1 Experiment2 Experiment3 metabolite a -1.050 0.266 0.004 butyrate b 0.058 0.067 -2.074 butyrate c 0.489 -0.385 0.746 butyrate d 1.153 0.068 0.207 butyrate e 0.736 1.563 0.700 butyrate f -1.155 0.745 0.158 butyrate g 0.186 1.671 0.067 butyrate

2 个答案:

答案 0 :(得分:1)

这样的东西应该有效,但这也非常简单:

table1 = Reduce(function(x,y){cbind(x,y)},
list(Experiment1$Statistic,Experiment2$Statistic,
Experiment3$Statistic,ExperimentalDesign$metabolite[1]))

table2 = Reduce(function(x,y){cbind(x,y)},
list(Experiment4$Statistic,Experiment5$Statistic,
Experiment6$Statistic,ExperimentalDesign$metabolite[2]))

编辑:更强大的解决方案:

首先创建一个名为ldf的所有实验数据的列表:

ldf = list(Experiment1,Experiment2,Experiment3,...,Experimentn)

然后:

lapply(1:ceiling(length(ldf)/3),
   function(t,l,df){ 
     if(t==ceiling(length(l)/3)){
       ind = ((3*t)-2):(3*t-(length(l)%%3))
     }else{
       ind = ((3*t)-2):(3*t)
     };
    cbind(Reduce(function(x,y){cbind(x,y)},lapply(l[ind],'[[','Statistic')),
    df$metabolite[t])
    },
ldf,ExperimentalDesign)

答案 1 :(得分:0)

如果您希望聚合每3个表,此解决方案应该可以执行您想要的操作。

library(reshape)

for(i in 1:17){
  Name <- letters[1:7]
  Statistic <- round(rnorm(7), 3)
  ExperimentName <- rep(paste0("Experiment",i), 7)
  assign(paste0("Experiment",i), data.frame(ExperimentName, Name, Statistic, stringsAsFactors = FALSE) )
}    

# set some parameters
NumberRepeats <- 5
Experiment_n <- 17
skipTube <- c(3,7,11)

# Create dummy list for the metabolites
metabolites <- c("met1", "met2", "met3", "met4", "met5")

for (iteration in c(1:Experiment_n)){
  if (iteration %% 3 == 0){
    temp_df <- rbind(get(paste0("Experiment", iteration - 2)), get(paste0("Experiment", iteration - 1)), get(paste0("Experiment", iteration)))
    print(temp_df)
    temp_df <- melt(data = temp_df)
    aggregates <- dcast(data = temp_df, formula = Name ~ ExperimentName, value.var = "value")
    aggregates$metabolite <- metabolites[iteration/3]
    print(aggregates)
  }
}