列表示多列

时间:2015-03-10 20:56:15

标签: r mean

我的数据集包含遵循特定前缀和后缀模式的列,并希望系统地创建列方式 为no2创造6天的平均值

df$no26 <- rowMeans(subset(df, select = c(no2_1,no2_2,no2_3,no2_4,no2_5,no2_6)))

将使用以下内容创建bc的5天平均值:

df$bc5<- rowMeans(subset(df, select = c(bc_1,bc_2,bc_3,bc_4,bc_5)))

如何使用最少的列名列表创建此类方法? 即使存在缺失值,我也希望计算平均值。在这种情况下,应根据可用和缺失的所有列值计算平均值 作为一个例子,我希望用代码

创建一个7.287的列平均值
df$no22 <- rowMeans(subset(df, select = c(no2_1,no2_2)))

但我却失踪了。添加na.rm = TRUE似乎没有效果。

我在下面找到的样本数据:

structure(list(no2_1 = c(16.6652581627745, 16.1861644525723, 
12.0690739274929, 14.7180965643026, 17.0917121137918, 9.13346007146945, 
14.5734824353949), no2_2 = c(16.5288772204419, 16.0538191129928, 
11.9615084628895, 14.5947277746085, 16.9509944066106, 9.04001830385535, 
NA), no2_3 = c(16.7950934018671, 16.3337465438763, 12.1697391489627, 
14.8892362872197, 14.6354992469195, 8.92549980722639, 14.595366058328
), no2_4 = c(17.180616290241, NA, 12.4976906878301, 15.2619924671276, 
12.4835154089113, 8.89727506636159, 14.9408652734481), no2_5 = c(16.8357574234312, 
17.8386592438754, 11.3983088719465, 16.3757398377023, 11.4093199571179, 
9.36056025673027, 15.9028268348344), no2_6 = c(16.0214862720291, 
18.4874694107089, 9.94904389869883, 16.9503452184647, 10.6770163895427, 
9.65783799344018, 15.7615475031484), bc_1 = c(0.161419291393667, 
0.157404540766928, 0.119038472594565, 0.139332980835602, 0.175313638583185, 
0.0859953210735663, 0.138659316642584), bc_2 = c(0.160963807995045, 
0.157468620067253, 0.119120320534171, 0.139882059272507, 0.175244058289416, 
0.0861284650489299, 0.13753649659359), bc_3 = c(0.165788721732669, 
0.162514084226369, 0.122749085709411, 0.144733227083797, 0.150886680287291, 
0.0859213694114626, 0.141372495609981), bc_4 = c(0.171603901048619, 
0.168286633890308, 0.127157156073102, 0.149955134061204, 0.127387599697608, 
0.0860720897547322, 0.146162920829911), bc_5 = c(0.168576332423096, 
0.183374076735581, 0.115963927265647, 0.164065282343513, 0.115121581746043, 
0.0911897631407383, 0.15828816660167), bc_6 = c(0.159774834761217, 
0.191737039861085, 0.100835120476982, 0.171016659707833, 0.106690061348893, 
0.0942738140677443, 0.157806261068888)), datalabel = "", time.stamp = "10 Mar 2015 21:51", .Names = c("no2_1", 
"no2_2", "no2_3", "no2_4", "no2_5", "no2_6", "bc_1", "bc_2", 
"bc_3", "bc_4", "bc_5", "bc_6"), formats = c("%9.0g", "%9.0g", 
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", 
"%9.0g", "%9.0g", "%9.0g"), types = c(255L, 255L, 255L, 255L, 
255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L), val.labels = c("", 
"", "", "", "", "", "", "", "", "", "", ""), var.labels = c("no2_1", 
"no2_2", "no2_3", "no2_4", "no2_5", "no2_6", "sot_1", "sot_2", 
"sot_3", "sot_4", "sot_5", "sot_6"), expansion.fields = list(
    c("_dta", "_lang_c", "default"), c("_dta", "_lang_list", 
    "default")), row.names = c("1", "2", "3", "4", "5", "6", 
"7"), version = 12L, class = "data.frame")

1 个答案:

答案 0 :(得分:0)

对于您的两个问题,您可以使用pastegrepl来保存击键,例如

rowMeans(df[grepl("^no2", names(df))]) 

或者

rowMeans(df[paste0("no2_", 1:6)]) 

或者

rowMeans(df[paste0("bc_", 1:5)])

对于最后一个问题,请改用rowSums,然后除以列数

temp <- df[c("no2_1","no2_2")]
rowSums(temp, na.rm = TRUE)/ncol(temp)
#         1         2         3         4         5         6         7 
# 16.597068 16.119992 12.015291 14.656412 17.021353  9.086739  7.286741