我正在尝试将前导/滞后应用于数据框中的组中的列。我有一个单独的数据框,提供铅值。必须在主题,节点和传感器之间匹配前导值。
示例数据:
dput(test_df)
structure(list(subj = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), class = "factor", .Label = c("c1", "c2")), node = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Node 1", "Node 2"), class = "factor"),
sensor = c(2600, 2600, 2600, 2600, 2600, 2610, 2610, 2610,
2610, 2610, 2620, 2620, 2620, 2620, 2620, 2630, 2630, 2630,
2630, 2630, 2600, 2600, 2600, 2600, 2600, 2610, 2610, 2610,
2610, 2610, 2620, 2620, 2620, 2620, 2620, 2630, 2630, 2630,
2630, 2630), env_vals = c(5.33510151261835, 5.37708998203619,
5.18984848232565, 6.82992070825272, 5.92982096601743, 7.05707692156306,
7.67415658214675, 7.34534719192697, 6.5280531083936, 4.42063211468128,
4.98606873099945, 6.71683566611408, 7.04201828330796, 3.22384043747125,
7.16178630140025, 3.97134044753568, 3.06904118833596, 6.10839825474766,
2.51080443592448, 1.62815576579611, 4.5366549039861, 4.05204500710188,
8.50974398925943, 0.454711437225098, 7.63457277730028, 7.73074760170432,
1.7535421576035, 1.255666521349, 2.67319773682482, 1.61263970508914,
6.84515776718986, 4.319997054675, 5.64959416239443, 1.52348658940225,
4.05659367113441, 5.19205390068456, 2.41995034428535, 4.81929265375379,
4.65957617474215, 3.85295676615691)), .Names = c("subj",
"node", "sensor", "env_vals"), row.names = c(NA, -40L), class = "data.frame")
dput(cc_df)
structure(list(subj = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("c1", "c2"), class = "factor"),
node = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Node 1", "Node 2"), class = "factor"),
sensor = c(2600, 2610, 2620, 2630, 2600, 2610, 2620, 2630,
2600, 2610, 2620, 2630, 2600, 2610, 2620, 2630), lg_val = c(1,
1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4)), .Names = c("subj",
"node", "sensor", "lg_val"), row.names = c(NA, -16L), class = "data.frame")
循环cc_df中的主题,节点,传感器组合,然后使用这些组合来应用相关的超前/滞后值。
for(i in 1:nrow(cc_df)){
sbj <- as.character(cc_df[i,1])
nd <- as.character(cc_df[i,2])
sens <- cc_df[i,3]
lg_val <- as.numeric(cc_df[i,4])
# print(str(data.frame(sbj,nd,sens,lg_val)))
# t2 <- filter(test_df, subj==sbj, node==nd, sensor==sens) %>% transmute_(lagged_env_sensor=lead(env_vals,lg_val))
test_df <- group_by(test_df, subj==sbj, node==nd, sensor==sens) %>% mutate(lagged_env_sensor=lead(env_vals,lg_val))
}
这给了我Error in eval(expr, envir, enclos) : cannot modify grouping variable
如果我取消注释上面的t2
部分,我会获得领先/滞后(以及关于列匹配的一些不相关的列)。
因此,我的问题是如何将正确的超前/滞后值应用于test_df的正确分组?
感谢。
伊恩
答案 0 :(得分:0)
听起来您想执行join操作:
library(dplyr)
new_df <- left_join(test_df, cc_df, by=c("subj", "node", "sensor"))
现在,您在同一个表中同时拥有env_vals
和lg_val
,由主题,节点和传感器分配:
str(new_df)
# 'data.frame': 80 obs. of 5 variables:
# $ subj : Factor w/ 2 levels "c1","c2": 1 1 1 1 1 1 1 1 1 1 ...
# $ node : Factor w/ 2 levels "Node 1","Node 2": 1 1 1 1 1 1 1 1 1 1 ...
# $ sensor : num 2600 2600 2600 2600 2600 2600 2600 2600 2600 2600 ...
# $ env_vals: num 5.34 5.34 5.38 5.38 5.19 ...
# $ lg_val : num 1 3 1 3 1 3 1 3 1 3 ...
您现在可以创建一个新列(尽管这会产生错误):
mutate(new_df, lagged_env_sensor=lead(env_vals, lg_val))