如何在R

时间:2017-08-03 14:55:07

标签: r

亲爱的Stackoverflow用户, 这是一个示例表,与我的相似,只有我有超过1000蛋白质,在这里我放置了2:     `#for stack overflow#

Accession <- rep(c("AT1G01320.1", "AT1G01050.1"), each =14)
Description<- rep(c("protein1", "protein2"), each = 14)
genotype <- c("WT", "WT","WT", "WT", "m", "m", "m", "f", "f", "f", "f", "ntrc", "ntrc", "ntrc")
genotype <- c("WT", "WT","WT", "WT", "m", "m", "m", "f", "f", "f", "f", "ntrc", "ntrc", "ntrc")
variable <- c("WT1", "WT2","WT3", "WT4", "m1", "m2", "m3", "f1", "f2", "f3", "f4", "ntrc1", "ntrc2", "ntrc3", "WT1", "WT2","WT3", "WT4", "m1", "m2", "m3", "f1", "f2", "f3", "f4", "ntrc1", "ntrc2", "ntrc3")

value <- c(5535705, 8034106, 4879639, 6817736, 23109581, 3778870, 6020611, 4480108, 6131362, 4210275, 27630841, 4702864,2966520, 9065916,  151903.67, 417423.81, 2895121.80, 810620.92, 822284.83, 6477122.14, 12266704.79, 11196940.77, 12143974.82, 1040832.60, 136497.86, 9294097.54, 506386.62, 32266.71)


prot<- data.frame(Accession, Description, genotype, variable, value)
> prot
 Accession Description genotype variable       value
1  AT1G01320.1    protein1       WT      WT1  5535705.00
2  AT1G01320.1    protein1       WT      WT2  8034106.00
3  AT1G01320.1    protein1       WT      WT3  4879639.00
4  AT1G01320.1    protein1       WT      WT4  6817736.00
5  AT1G01320.1    protein1        m       m1 23109581.00
6  AT1G01320.1    protein1        m       m2  3778870.00
7  AT1G01320.1    protein1        m       m3  6020611.00
8  AT1G01320.1    protein1        f       f1  4480108.00
9  AT1G01320.1    protein1        f       f2  6131362.00
10 AT1G01320.1    protein1        f       f3  4210275.00
11 AT1G01320.1    protein1        f       f4 27630841.00
12 AT1G01320.1    protein1     ntrc    ntrc1  4702864.00
13 AT1G01320.1    protein1     ntrc    ntrc2  2966520.00
14 AT1G01320.1    protein1     ntrc    ntrc3  9065916.00
15 AT1G01050.1    protein2       WT      WT1   151903.67
16 AT1G01050.1    protein2       WT      WT2   417423.81
17 AT1G01050.1    protein2       WT      WT3  2895121.80
18 AT1G01050.1    protein2       WT      WT4   810620.92
19 AT1G01050.1    protein2        m       m1   822284.83
20 AT1G01050.1    protein2        m       m2  6477122.14
21 AT1G01050.1    protein2        m       m3 12266704.79
22 AT1G01050.1    protein2        f       f1 11196940.77
23 AT1G01050.1    protein2        f       f2 12143974.82
24 AT1G01050.1    protein2        f       f3  1040832.60
25 AT1G01050.1    protein2        f       f4   136497.86
26 AT1G01050.1    protein2     ntrc    ntrc1  9294097.54
27 AT1G01050.1    protein2     ntrc    ntrc2   506386.62
28 AT1G01050.1    protein2     ntrc    ntrc3    32266.71
> 

我想编写一个循环,首先将包含&gt; 1000条目的原始数据框子集成基于单个蛋白质ID的子集,而不是单向ANOVA和Tukeys HSD,从Tukeys获得p adj,而不是将其打印成pdf 。 到目前为止我有:

`IDs<-unique((prot$Accession))
 tukey_fullAA <- list()
 table_fullAA <- NULL

 for (i in 1:length(IDs)){
 temp <- prot[(prot$Accession)==IDs[i],]
 AV<- summary(aov(temp$value ~ temp$genotype))
 tukey_fullAA <- list(TukeyHSD(aov(temp$value ~ temp$genotype)))
 }

 for(j in 1:length(tukey_fullAA))## important loop over whole list
 {
 tukey <- tukey_fullAA[[j]]
 factor_table <- unlist(lapply(tukey, function(x) nrow(x)))
 factor_table <- rep(names(factor_table), factor_table)
 tukey_bound <- NULL
 for (k in 1:length(tukey))
 {
 tukey_bound <- rbind(tukey_bound, tukey[[k]])
 }
 pairs <- rownames(tukey_bound)
 rownames(tukey_bound) <- NULL
 tukey_bound <- as.data.frame(tukey_bound)
 tukey_bound$parameter <- factor_table
 tukey_bound$pairs <- pairs
 table_fullAA <- rbind(table_fullAA, tukey_bound)
 }

因为现在它没有循环,我努力将Tukey HSD放到表中,当我拥有它时我想找到重要的值p adj,我也很困惑添加一个可以说的列这些价值是什么蛋白质。我把它想象成一个列的第一列,其中包含一个名称,用于输出所需的行数。 非常感谢!

0 个答案:

没有答案