计算数据框中3列的平均值

时间:2014-06-10 09:19:40

标签: r

我有3个数据框,它们只是重复。所以我想绑定它们并计算每个分数的平均值。

三个数据框:

Nr.1

> dput(head(tbl_gel1))
structure(list(Name = c("yal003w", "yal005c", "yal012w", "yal016w", 
"yal035w", "yal038w"), `1_1` = c(1.08346521189121, NA, NA, NA, 
NA, NA), `1_10` = c(0.267721905361376, 1.43303883148383, 1.61684304894131, 
NA, NA, NA), `1_11` = c(0.189487668138674, 0.75522363065885, 
1, NA, NA, NA), `1_12` = c(NA, 1.01340492119247, NA, NA, NA, 
NA), `1_13` = c(0.374782308020683, 0.945489433731933, NA, NA, 
NA, 0.0317297633029047), `1_14` = c(0.437488212634424, 1.18763709680314, 
NA, NA, NA, 0.0278039649538794), `1_15` = c(1, 0.963283876302253, 
NA, NA, NA, 0.101985769564935), `1_16` = c(0.933864874212228, 
0.534233379286527, NA, NA, NA, 0.216767470594226), `1_17` = c(1, 
0.665519263271478, NA, NA, 1, 1), `1_18` = c(0.666036574750145, 
0.570465125348879, NA, NA, NA, 1.42894349812116), `1_19` = c(0.514337131747938, 
0.23204076838128, NA, NA, 1, 1.2521214021452), `1_2` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `1_20` = c(NA, 
NA, NA, NA, NA, 1.40803677399372), `1_21` = c(1.09990599806138, 
NA, NA, NA, NA, 1.04631699593704), `1_22` = c(1.26442418472118, 
NA, NA, NA, NA, 0.928872017485782), `1_23` = c(1.11596921281805, 
NA, NA, NA, 1, 0.34698227364696), `1_24` = c(0.754496014447251, 
NA, NA, NA, 1, 0.222234793614252), `1_3` = c(6.29254185223621, 
NA, NA, 0.693642968439352, NA, NA), `1_4` = c(1.36347593974479, 
NA, NA, 1, NA, NA), `1_5` = c(0.765885344543765, NA, NA, 1, NA, 
NA), `1_6` = c(0.238118001668604, 0.679584207611477, NA, NA, 
NA, NA), `1_7` = c(0.847897771442355, 0.277348019879946, NA, 
NA, NA, NA), `1_8` = c(0.356154192700505, 1, 0.409523853881517, 
NA, NA, NA), `1_9` = c(0.180109142324181, 1, 0.578310191227172, 
NA, NA, 0.093113736249161)), .Names = c("Name", "1_1", "1_10", 
"1_11", "1_12", "1_13", "1_14", "1_15", "1_16", "1_17", "1_18", 
"1_19", "1_2", "1_20", "1_21", "1_22", "1_23", "1_24", "1_3", 
"1_4", "1_5", "1_6", "1_7", "1_8", "1_9"), row.names = c(NA, 
6L), class = "data.frame")

NR。 2

> dput(head(tbl_gel2))
structure(list(Name = c("yal003w", "yal005c", "yal012w", "yal016w", 
"yal035w", "yal038w"), `2_1` = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), `2_2` = c(1.0548947840373, NA, 
NA, NA, NA, NA), `2_3` = c(1.61794716486303, 0.346821796129205, 
NA, NA, NA, NA), `2_4` = c(1, NA, NA, 0.378254379051086, NA, 
NA), `2_5` = c(0.670710809411423, NA, NA, 1, NA, NA), `2_6` = c(0.313872585645673, 
NA, NA, NA, NA, NA), `2_7` = c(0.299293639466945, 0.13920907824675, 
NA, NA, NA, NA), `2_8` = c(0.311431376422469, 0.511742245543671, 
0.342807141055383, NA, NA, NA), `2_9` = c(0.243672215177189, 
1, 0.689138745271004, NA, NA, 0.0540861571772987), `2_10` = c(0.154732102234279, 
1.08973258347909, 1, NA, NA, NA), `2_11` = c(0.149365726324845, 
1.1210733533474, 1.0427649268992, NA, NA, 0.0955468461925663), 
    `2_12` = c(0.153741630869067, 2.96276072446013, 1, NA, NA, 
    NA), `2_13` = c(0.629371115599316, 0.952868912207058, 0.0771105403237483, 
    NA, NA, 0.0885212695236819), `2_14` = c(0.907644486740723, 
    1.43000783337778, NA, NA, NA, 0.138102409899801), `2_15` = c(1.09683345304359, 
    0.423641943213571, NA, NA, NA, 0.255699738225622), `2_16` = c(0.913095779338154, 
    0.510977400533081, NA, NA, 0.520556617688936, 0.284898552722227
    ), `2_17` = c(0.935941553863477, 0.388225948821767, NA, NA, 
    1.14984991998928, 1), `2_18` = c(2.21746156904543, 0.642743615867438, 
    NA, NA, NA, 2.22716071647178), `2_19` = c(0.500618035526774, 
    0.282924681750454, NA, NA, NA, 1), `2_20` = c(0.701627311828743, 
    0.254001731153973, NA, NA, 1, 1.15996914621286), `2_21` = c(1.97359874904275, 
    NA, NA, NA, 1.67526802494991, 1.38709456754353), `2_22` = c(2.09198896289293, 
    NA, NA, NA, NA, 0.921672834103247), `2_23` = c(1.18791465369551, 
    NA, NA, NA, NA, 0.576309066193914), `2_24` = c(0.473199477125101, 
    0.176144702328764, NA, NA, 1, 0.130236848112641)), .Names = c("Name", 
"2_1", "2_2", "2_3", "2_4", "2_5", "2_6", "2_7", "2_8", "2_9", 
"2_10", "2_11", "2_12", "2_13", "2_14", "2_15", "2_16", "2_17", 
"2_18", "2_19", "2_20", "2_21", "2_22", "2_23", "2_24"), row.names = c(NA, 
6L), class = "data.frame")

Nr.3

> dput(head(tbl_gel3))
structure(list(Name = c("yal003w", "yal005c", "yal012w", "yal016w", 
"yal035w", "yal038w"), `3_1` = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), `3_2` = c(1, 1.4605309655311, 
NA, NA, NA, NA), `3_3` = c(1.74480713727388, 0.42825619952525, 
NA, NA, NA, NA), `3_4` = c(1, 0.431712121875013, NA, 0.395182020245312, 
NA, NA), `3_5` = c(2.26247329056518, 0.644462177666441, NA, 1, 
NA, NA), `3_6` = c(0.619783374266709, 0.472094874244026, NA, 
NA, NA, NA), `3_7` = c(0.45731912574756, 0.176354321796083, NA, 
NA, NA, NA), `3_8` = c(0.271829278733367, 0.517232771669986, 
0.153774052052871, NA, NA, NA), `3_9` = c(0.141017619508583, 
1.41279969394534, 0.651948154271122, NA, NA, NA), `3_10` = c(NA, 
1.64435171100405, 0.998807430240956, NA, NA, NA), `3_11` = c(0.110046035477971, 
1.33684444261939, 1.25595310581771, NA, NA, 0.0236163735479745
), `3_12` = c(NA, 0.982250906830292, 0.39283619985401, NA, NA, 
0.0688303458902568), `3_13` = c(0.136798076436642, 0.55729642483448, 
0.176525038283566, NA, NA, 0.0251189412372225), `3_14` = c(0.316623893146817, 
1, NA, NA, NA, 0.0727823461722849), `3_15` = c(NA, 0.607991038574375, 
NA, NA, NA, 0.133968257432001), `3_16` = c(0.362994392402489, 
0.547183167896534, NA, NA, NA, 0.0777347708647245), `3_17` = c(1, 
0.116561118715651, NA, NA, 0.710972173471528, 1), `3_18` = c(NA, 
3.63330458071475, NA, NA, NA, 3.24019081192985), `3_19` = c(NA, 
NA, NA, NA, NA, 2.46635222132474), `3_20` = c(0.452303676849426, 
0.0896715384025126, NA, NA, 1, 1), `3_21` = c(1.50169299468485, 
0.513442106966708, NA, NA, 1.45124841710635, 1.02529618467026
), `3_22` = c(0.565232592993276, 0.748536315065533, NA, NA, 2.9089322117881, 
0.782555457293307), `3_23` = c(1.62622280168665, 0.704926586534075, 
NA, NA, NA, 0.584486806995139), `3_24` = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_)), .Names = c("Name", 
"3_1", "3_2", "3_3", "3_4", "3_5", "3_6", "3_7", "3_8", "3_9", 
"3_10", "3_11", "3_12", "3_13", "3_14", "3_15", "3_16", "3_17", 
"3_18", "3_19", "3_20", "3_21", "3_22", "3_23", "3_24"), row.names = c(NA, 
6L), class = "data.frame")

我使用下面的函数来绑定它们。每个数据框中有不同的行数,在某些情况下,不同的名称也是如此,因此在最终表中应该比每个行中的行多。

mylist <- list(tbl_gel1,tbl_gel2,tbl_gel3)
tbl_all <- Reduce(function(x, y) merge(x, y, all=T,by="Name",sort=F), 
                  mylist, accumulate=F)

直到这一刻,一切都很顺利。 现在我想计算每个分数的平均值(总共有24个分数)

## Calculating the mean
tbl_all1 <- tbl_all[-1]
ind <- c(1, 25, 49)
tbl_mean <- cbind(tbl_all[1], sapply(0:23, function(i) rowMeans(tbl_all1[ind+i])))

该功能有问题,因为许多行的sum给出了0。这绝对是错误的,因为在tbl_gel1中,其他只是在任何分数中至少有一个数字的行。

如果我查看tbl_mean,我会看到sum 0的行位于底部。

0 个答案:

没有答案