从平均值计算中排除其中一列

时间:2014-05-15 15:08:04

标签: r

我有一个像这样的data.frame:

> dput(head(dat))
structure(list(`Gene name` = c("at1g01050", "at1g01080", "at1g01090", 
"at1g01220", "at1g01320", "at1g01420"), `1_1` = c(0, 0, 0, 0, 
0, 0), `1_2` = c(0, 0, 0, 0, 0, 0), `1_3` = c(0, 2.2266502274762, 
0, 0, 0, 0), `1_4` = c(0, 1.42835007256373, 0, 0, 0, 0), `1_5` = c(0, 
1, 0, 0, 0, 0.680307288653971), `1_6` = c(0, 0.974694551708235, 
0.0703315834738149, 0, 0, 1.5411058346636), `1_7` = c(1, 1.06166030205396, 
0, 0, 0, 0), `1_8` = c(1, 1.07309874414745, 0.129442847788922, 
0, 0, 0), `1_9` = c(1.83566164452602, 0.770848509662441, 1.16522133036595, 
1.02360016370994, 0, 0), `1_10` = c(0, 0, 0.96367393959757, 0, 
0, 0), `1_11` = c(0, 1, 1.459452636222, 0, 0.992067202742928, 
0), `1_12` = c(0, 0, 0.670100384155585, 0, 0.461601636474094, 
0), `1_13` = c(0, 0, 1.43074917909221, 0, 1.35246977730244, 0
), `1_14` = c(0, 0, 1.13052717277684, 0, 1.27971261718285, 0), 
    `1_15` = c(0, 0, 0, 0, 0, 0), `1_16` = c(0, 0, 1.02186950513655, 
    0, 0.937805171752374, 0), `1_17` = c(0, 0, 0, 0, 1.82226410514639, 
    0), `1_18` = c(0, 0, 1.2057581396188, 0, 1, 0), `1_19` = c(0, 
    0, 2.54080080087007, 0, 1.74014162763125, 0), `1_20` = c(0, 
    0, 0, 0, 0, 0), `1_21` = c(0, 0, 1.85335086627868, 0, 2.93605031878879, 
    0), `1_22` = c(0, 0, 0, 0, 0, 0), `1_23` = c(0, 0, 0, 0, 
    0, 0), `1_24` = c(0, 0.59685787388353, 4.74450895485671, 
    0, 1.64665192735547, 0), `1_25` = c(0, 0, 0, 0, 0, 0), `1_26` = c(0, 
    0, 0, 0, 0, 0), `1_27` = c(0, 1.70324142554566, 0, 0, 0, 
    0), `1_28` = c(0, 4.02915818089525, 0, 0, 0, 0), `1_29` = c(0, 
    1.10050253348262, 0, 0, 0, 1.78705663080963), `1_30` = c(0, 
    0, 0, 0, 0, 0), `1_31` = c(0.525193634811661, 1.19203674964562, 
    0, 0, 0, 0), `1_32` = c(0.949695564218912, 0.511935958918944, 
    0.698256748091399, 0.924419021307232, 0, 0), `1_33` = c(1, 
    0.392202418854686, 0.981531026331928, 1, 0, 0), `1_34` = c(0, 
    0, 1.04480642952605, 0, 0, 0), `1_35` = c(0.875709646300199, 
    0.416787083481068, 0.910412293707794, 0, 0.931813162802324, 
    0), `1_36` = c(0.235817844851986, 0, 0.695496044366791, 0, 
    0, 0), `1_37` = c(0, 0, 0, 0, 0, 0), `1_38` = c(0, 0, 0, 
    0, 0, 0), `1_39` = c(0, 0, 0, 0, 0, 0), `1_40` = c(0, 0.426301584359177, 
    1.05916031917965, 0, 1.11716924423855, 0), `1_41` = c(0, 
    0, 0, 0, 0, 0), `1_42` = c(0, 0, 0, 0, 0, 0), `1_43` = c(0, 
    0, 0, 0, 0, 0), `1_44` = c(0, 0.817605484758179, 1, 0, 1, 
    0), `1_45` = c(0, 0, 0, 0, 1.83706702696725, 0), `1_46` = c(0, 
    0, 0, 0, 0, 0), `1_48` = c(0, 0, 0, 0, 0, 0), `1_49` = c(0, 
    0, 0, 0, 0, 0), `1_50` = c(0, 0, 0, 0, 0, 0), `1_51` = c(0, 
    0.822966241998042, 0, 0, 0, 0), `1_52` = c(0, 1.38548267401525, 
    0, 0, 0, 0), `1_53` = c(0, 0.693090058304095, 0, 0, 0, 1.200664746484
    ), `1_54` = c(0, 7.58136662752864, 0, 0, 0, 0), `1_55` = c(0.519878111919004, 
    0.530809413647805, 0.343274113384907, 0, 0, 0), `1_56` = c(1.24511715957891, 
    0.545097856366912, 0.397440073804376, 0, 0, 0), `1_57` = c(1.26748496499576, 
    0.502893153188496, 1, 1.09278985531586, 0, 0), `1_58` = c(0.696198684496234, 
    0.68197003689249, 1.30108437738319, 0.778091049180591, 0.533017938104689, 
    0), `1_59` = c(1.15255606344999, 0.294294436704185, 1.07862692616479, 
    1, 0.250091116406616, 0), `1_60` = c(1.95634163405497, 0, 
    1.1602014253913, 0, 0, 0), `1_61` = c(1.09287167009628, 0, 
    2.05939536537347, 1.08165521287259, 0.68027384701565, 0), 
    `1_62` = c(0.791776166968497, 0, 0.846107162142824, 0, 0.77013323652256, 
    0), `1_63` = c(0.378787010943447, 0.391876271945063, 0.623223753921758, 
    0, 0.651918444771296, 0), `1_64` = c(0.189585762007804, 0.361452381684218, 
    0.799519726870751, 0, 1.06818683719768, 0), `1_65` = c(0, 
    0, 2.5212953775211, 0, 0, 0), `1_66` = c(0, 0, 0, 0, 0, 0
    ), `1_67` = c(0, 0, 0, 0, 2.44827717262786, 0), `1_68` = c(0, 
    0, 0, 0, 0, 0), `1_69` = c(0, 0, 0, 0, 0, 0), `1_70` = c(0, 
    0, 2.36142611074334, 0, 2.391093649557, 0), `1_71` = c(0, 
    0, 0.35565044656798, 0, 0, 0), `1_72` = c(0, 0, 5.86951313801941, 
    0, 0, 0)), .Names = c("Gene name", "1_1", "1_2", "1_3", "1_4", 
"1_5", "1_6", "1_7", "1_8", "1_9", "1_10", "1_11", "1_12", "1_13", 
"1_14", "1_15", "1_16", "1_17", "1_18", "1_19", "1_20", "1_21", 
"1_22", "1_23", "1_24", "1_25", "1_26", "1_27", "1_28", "1_29", 
"1_30", "1_31", "1_32", "1_33", "1_34", "1_35", "1_36", "1_37", 
"1_38", "1_39", "1_40", "1_41", "1_42", "1_43", "1_44", "1_45", 
"1_46", "1_48", "1_49", "1_50", "1_51", "1_52", "1_53", "1_54", 
"1_55", "1_56", "1_57", "1_58", "1_59", "1_60", "1_61", "1_62", 
"1_63", "1_64", "1_65", "1_66", "1_67", "1_68", "1_69", "1_70", 
"1_71", "1_72"), row.names = c(NA, 6L), class = "data.frame")

这是我用来计算数据框中3次重复的均值的代码:

## Calculating the mean of 3 "replicates"
ind <- c(1, 25, 49)
dat2 <- dat[-1]
tbl_end <- cbind(dat[1], sapply(0:23, function(i) rowMeans(dat2[ind+i])))

这是一个错误:

Error in `[.data.frame`(dat2, ind + i) : undefined columns selected
Called from: eval(substitute(browser(skipCalls = pos), list(pos = 9 - frame)), 
    envir = sys.frame(frame))

我有71列结果(应该是72,因为我有24个分数,3个重复,总共72个)但是应该有一个列。不知道为什么它会丢失,但无论如何我必须解决它。没有1_47应该带有1_23和1_71。您是否知道如何编辑我的函数以忽略分数1_47并仍然得到1_23和1_71的平均值?

1 个答案:

答案 0 :(得分:1)

为什么不添加1_47的虚拟列。这将使您的数据更加规则,并使您更容易提取所需的索引。为此,请尝试

dat2<-cbind(dat[1:47], 1_47=rep(NA, nrow(dat)), dat[48:72])
ind <- c(1, 25, 49)
tbl_end <- cbind(dat[1], sapply(0:23, function(i) rowMeans(dat2[ind+i+1], na.rm=T)))