dplyr>获取具有最小和最大变量的行

时间:2015-06-15 02:48:40

标签: r dplyr

我有一个data.frame,我希望返回minmax时间观察 值。

df<- data.frame(
   time=c(24594.55, 29495.45, 24594.55, 39297.27, 24594.55, 34396.36, 19693.64, 14792.73, 29495.45),
   Mz=c(-0.04729751, -0.50902297, -0.04376393, -0.22218980, -0.36407263, -0.38341534, -0.34597255, -0.01480776, -0.00999671),
   set_nbr=c(1, 1,1, 2, 2, 2, 3, 3, 3))         


library(dplyr)

min_time <- df %>% 
  group_by(set_nbr) %>%
  slice(which(Mz<0))%>%
  filter(rank(time,ties.method="min")==1)%>%
  distinct


min_time 

 ##Source: local data frame [3 x 3]
 ##Groups: set_nbr

      time          Mz set_nbr
## 1 24594.55 -0.04729751       1
## 2 24594.55 -0.36407263       2
## 3 14792.73 -0.01480776       3

这样可行,但是当我尝试获得max_time时,会出现奇怪的结果:

max_time <- df %>% 
  group_by(set_nbr) %>%
  slice(which(Mz<0))%>%
  filter(rank(time,ties.method="max")==1)%>%
  distinct

max_time 

##Source: local data frame [2 x 3]
##Groups: set_nbr

        time          Mz set_nbr
##1 24594.55 -0.36407263       2
##2 14792.73 -0.01480776       3

set_nbr 1,max time值不正确。我不知道为什么。

预期输出

max_time 

      time            Mz    set_nbr
##1 29495.45 -0.50902297       1
##2 39297.27 -0.22218980       2
##3 29495.45 -0.00999671       3

1 个答案:

答案 0 :(得分:3)

尝试

df %>% 
   group_by(set_nbr) %>% 
   filter(time==max(time))
#     time          Mz set_nbr
#1 29495.45 -0.50902297       1
#2 39297.27 -0.22218980       2
#3 29495.45 -0.00999671       3

或者

 df %>%
    group_by(set_nbr) %>%
    slice(which.max(time))
 #      time          Mz set_nbr
 #1 29495.45 -0.50902297       1
 #2 39297.27 -0.22218980       2
 #3 29495.45 -0.00999671       3

关于你的代码无效的原因

 df %>% 
    group_by(set_nbr) %>%
    slice(which(Mz <0)) %>%
    mutate(rn = rank(time, ties.method='max'))
 #      time          Mz set_nbr rn
 #1 24594.55 -0.04729751       1  2
 #2 29495.45 -0.50902297       1  3
 #3 24594.55 -0.04376393       1  2
 #4 39297.27 -0.22218980       2  3
 #5 24594.55 -0.36407263       2  1
 #6 34396.36 -0.38341534       2  2
 #7 19693.64 -0.34597255       3  2
 #8 14792.73 -0.01480776       3  1
 #9 29495.45 -0.00999671       3  3

如果查看输出,对于'set_nbr'组'1','rn'没有'1',因为有关系。你可以做到

 df %>% 
    group_by(set_nbr) %>%
    slice(which(Mz <0)) %>%
    filter(rn = rank(-time, ties.method='first')==1)
 #      time          Mz set_nbr
 #1 29495.45 -0.50902297       1
 #2 39297.27 -0.22218980       2
 #3 29495.45 -0.00999671       3