# Sample dataframe
set.seed(123)
d = data.frame(x = runif(120), grp = gl(3, 40))
# Select top_n
d %>%
group_by(grp) %>%
top_n(n=3, wt=x)
如何在同一管道中同时选择顶部和底部观测值? 尝试了以下方法,但不起作用
# helper function
my_top_bott = function(x, n, wt) {
x1 = x %>% top_n(n=n, wt=wt)
x2 = x %>% top_n(n=n, wt=-wt)
x = bind_rows(x1, x2)
return(x)
}
# Pipe
d %>%
group_by(grp) %>%
my_top_bott(., n=3, wt=x)
答案 0 :(得分:3)
一种可能是:
d %>%
group_by(grp) %>%
filter(dense_rank(x) <= 3 | dense_rank(desc(x)) <= 3)
x grp
<dbl> <fct>
1 0.0456 1
2 0.957 1
3 0.0421 1
4 0.994 1
5 0.963 1
6 0.0246 1
7 0.858 2
8 0.0458 2
9 0.895 2
10 0.0948 2
11 0.815 2
12 0.000625 2
13 0.103 3
14 0.985 3
15 0.0936 3
16 0.954 3
17 0.0607 3
18 0.954 3
或者@IceCreamToucan提出的可能性:
d %>%
group_by(grp) %>%
filter(!between(dense_rank(x), 3 + 1, n() - 3))
答案 1 :(得分:0)
您也可以使用row_number()
。
d %>%
group_by(grp) %>%
arrange(desc(x)) %>%
filter(row_number() > max(row_number()) - 3 | row_number() <= 3)
x grp
<dbl> <fct>
1 0.995 2
2 0.975 2
3 0.975 1
4 0.974 3
5 0.974 3
6 0.960 1
7 0.960 3
8 0.951 2
9 0.874 1
10 0.127 2
11 0.104 2
12 0.0693 1
13 0.0520 1
14 0.0279 2
15 0.0146 3
16 0.0114 3
17 0.00864 1
18 0.00333 3