hc1<- data.frame(id= c(1,1,1,2,2,2,3,3),
testoccasion= c(1,2,3,1,2,3,1,2),
ht= c(0.2,0.1,0.8,0.9,1.0,0.5,0.4,0.8),
time= c(5,4,8,5,6,5,2,1))
这是我的代码。
library(dplyr)
a<-hc1 %>%
group_by(id) %>%
arrange(id,testoccasion) %>%
mutate(fd = (ht-lag(ht))/lag(ht)*100) %>%
mutate(t = time-lag(time))
b<-hc1 %>%
group_by(id) %>%
arrange(id,testoccasion) %>%
mutate(fd = (ht-lag(ht,2))/lag(ht,2)*100) %>%
mutate(t = time-lag(time,2))
c<-hc1 %>%
group_by(id) %>%
arrange(id,testoccasion) %>%
mutate(fd = (ht-lag(ht,3))/lag(ht,3)*100) %>%
mutate(t = time-lag(time,3))
diff<-rbind(a,b,c)
diff<-na.omit(diff)
我很好奇如何使这段代码更短。我希望能够针对所有测试场合找到所有可能的ht对之间的差异,其中每个id的测试场合数量不同。如果我不必像这样重复进行操作,那将是非常不错的,因为这是一个巨大的数据集。谢谢!
答案 0 :(得分:3)
我们可以使用map
来循环n
中使用的lag
library(tidyverse)
map_df(1:3, ~
hc1 %>%
group_by(id) %>%
arrange(id, testoccasion) %>%
mutate(fd = (ht -lag(ht, .x))/lag(ht, .x) * 100,
t = time -lag(time, .x))) %>%
na.omit
# A tibble: 7 x 6
# Groups: id [3]
# id testoccasion ht time fd t
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 2 0.1 4 -50 -1
#2 1 3 0.8 8 700 4
#3 2 2 1 6 11.1 1
#4 2 3 0.5 5 -50 -1
#5 3 2 0.8 1 100 -1
#6 1 3 0.8 8 300. 3
#7 2 3 0.5 5 -44.4 0