我的数据集包含遵循特定前缀和后缀模式的列,并希望系统地创建列方式 为no2创造6天的平均值
df$no26 <- rowMeans(subset(df, select = c(no2_1,no2_2,no2_3,no2_4,no2_5,no2_6)))
将使用以下内容创建bc的5天平均值:
df$bc5<- rowMeans(subset(df, select = c(bc_1,bc_2,bc_3,bc_4,bc_5)))
如何使用最少的列名列表创建此类方法? 即使存在缺失值,我也希望计算平均值。在这种情况下,应根据可用和缺失的所有列值计算平均值 作为一个例子,我希望用代码
创建一个7.287的列平均值df$no22 <- rowMeans(subset(df, select = c(no2_1,no2_2)))
但我却失踪了。添加na.rm = TRUE似乎没有效果。
我在下面找到的样本数据:
structure(list(no2_1 = c(16.6652581627745, 16.1861644525723,
12.0690739274929, 14.7180965643026, 17.0917121137918, 9.13346007146945,
14.5734824353949), no2_2 = c(16.5288772204419, 16.0538191129928,
11.9615084628895, 14.5947277746085, 16.9509944066106, 9.04001830385535,
NA), no2_3 = c(16.7950934018671, 16.3337465438763, 12.1697391489627,
14.8892362872197, 14.6354992469195, 8.92549980722639, 14.595366058328
), no2_4 = c(17.180616290241, NA, 12.4976906878301, 15.2619924671276,
12.4835154089113, 8.89727506636159, 14.9408652734481), no2_5 = c(16.8357574234312,
17.8386592438754, 11.3983088719465, 16.3757398377023, 11.4093199571179,
9.36056025673027, 15.9028268348344), no2_6 = c(16.0214862720291,
18.4874694107089, 9.94904389869883, 16.9503452184647, 10.6770163895427,
9.65783799344018, 15.7615475031484), bc_1 = c(0.161419291393667,
0.157404540766928, 0.119038472594565, 0.139332980835602, 0.175313638583185,
0.0859953210735663, 0.138659316642584), bc_2 = c(0.160963807995045,
0.157468620067253, 0.119120320534171, 0.139882059272507, 0.175244058289416,
0.0861284650489299, 0.13753649659359), bc_3 = c(0.165788721732669,
0.162514084226369, 0.122749085709411, 0.144733227083797, 0.150886680287291,
0.0859213694114626, 0.141372495609981), bc_4 = c(0.171603901048619,
0.168286633890308, 0.127157156073102, 0.149955134061204, 0.127387599697608,
0.0860720897547322, 0.146162920829911), bc_5 = c(0.168576332423096,
0.183374076735581, 0.115963927265647, 0.164065282343513, 0.115121581746043,
0.0911897631407383, 0.15828816660167), bc_6 = c(0.159774834761217,
0.191737039861085, 0.100835120476982, 0.171016659707833, 0.106690061348893,
0.0942738140677443, 0.157806261068888)), datalabel = "", time.stamp = "10 Mar 2015 21:51", .Names = c("no2_1",
"no2_2", "no2_3", "no2_4", "no2_5", "no2_6", "bc_1", "bc_2",
"bc_3", "bc_4", "bc_5", "bc_6"), formats = c("%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g"), types = c(255L, 255L, 255L, 255L,
255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L), val.labels = c("",
"", "", "", "", "", "", "", "", "", "", ""), var.labels = c("no2_1",
"no2_2", "no2_3", "no2_4", "no2_5", "no2_6", "sot_1", "sot_2",
"sot_3", "sot_4", "sot_5", "sot_6"), expansion.fields = list(
c("_dta", "_lang_c", "default"), c("_dta", "_lang_list",
"default")), row.names = c("1", "2", "3", "4", "5", "6",
"7"), version = 12L, class = "data.frame")
答案 0 :(得分:0)
对于您的两个问题,您可以使用paste
或grepl
来保存击键,例如
rowMeans(df[grepl("^no2", names(df))])
或者
rowMeans(df[paste0("no2_", 1:6)])
或者
rowMeans(df[paste0("bc_", 1:5)])
对于最后一个问题,请改用rowSums
,然后除以列数
temp <- df[c("no2_1","no2_2")]
rowSums(temp, na.rm = TRUE)/ncol(temp)
# 1 2 3 4 5 6 7
# 16.597068 16.119992 12.015291 14.656412 17.021353 9.086739 7.286741