我有一个data.frame,我希望返回min
和max
时间观察
值。
df<- data.frame(
time=c(24594.55, 29495.45, 24594.55, 39297.27, 24594.55, 34396.36, 19693.64, 14792.73, 29495.45),
Mz=c(-0.04729751, -0.50902297, -0.04376393, -0.22218980, -0.36407263, -0.38341534, -0.34597255, -0.01480776, -0.00999671),
set_nbr=c(1, 1,1, 2, 2, 2, 3, 3, 3))
library(dplyr)
min_time <- df %>%
group_by(set_nbr) %>%
slice(which(Mz<0))%>%
filter(rank(time,ties.method="min")==1)%>%
distinct
min_time
##Source: local data frame [3 x 3]
##Groups: set_nbr
time Mz set_nbr
## 1 24594.55 -0.04729751 1
## 2 24594.55 -0.36407263 2
## 3 14792.73 -0.01480776 3
这样可行,但是当我尝试获得max_time时,会出现奇怪的结果:
max_time <- df %>%
group_by(set_nbr) %>%
slice(which(Mz<0))%>%
filter(rank(time,ties.method="max")==1)%>%
distinct
max_time
##Source: local data frame [2 x 3]
##Groups: set_nbr
time Mz set_nbr
##1 24594.55 -0.36407263 2
##2 14792.73 -0.01480776 3
set_nbr
1,max
time
值不正确。我不知道为什么。
预期输出
max_time
time Mz set_nbr
##1 29495.45 -0.50902297 1
##2 39297.27 -0.22218980 2
##3 29495.45 -0.00999671 3
答案 0 :(得分:3)
尝试
df %>%
group_by(set_nbr) %>%
filter(time==max(time))
# time Mz set_nbr
#1 29495.45 -0.50902297 1
#2 39297.27 -0.22218980 2
#3 29495.45 -0.00999671 3
或者
df %>%
group_by(set_nbr) %>%
slice(which.max(time))
# time Mz set_nbr
#1 29495.45 -0.50902297 1
#2 39297.27 -0.22218980 2
#3 29495.45 -0.00999671 3
关于你的代码无效的原因
df %>%
group_by(set_nbr) %>%
slice(which(Mz <0)) %>%
mutate(rn = rank(time, ties.method='max'))
# time Mz set_nbr rn
#1 24594.55 -0.04729751 1 2
#2 29495.45 -0.50902297 1 3
#3 24594.55 -0.04376393 1 2
#4 39297.27 -0.22218980 2 3
#5 24594.55 -0.36407263 2 1
#6 34396.36 -0.38341534 2 2
#7 19693.64 -0.34597255 3 2
#8 14792.73 -0.01480776 3 1
#9 29495.45 -0.00999671 3 3
如果查看输出,对于'set_nbr'组'1','rn'没有'1',因为有关系。你可以做到
df %>%
group_by(set_nbr) %>%
slice(which(Mz <0)) %>%
filter(rn = rank(-time, ties.method='first')==1)
# time Mz set_nbr
#1 29495.45 -0.50902297 1
#2 39297.27 -0.22218980 2
#3 29495.45 -0.00999671 3