我有一个数据,我想首先过滤一些行并将剩余的行相加。
过滤条件如下;
y_value==10
的最后一次出现并保留其前面的所有行(包括此值10行的最后一次出现)!y_value==10
并保留其后的所有行(包括第一次出现此值10行)!数据是这样的;
df <- data.frame(gr=rep(c(1,2),c(8,7)),
y_value=c(c(2,10,10,8,10,6,0,0),c(0,0,10,10,6,8,10)))
gr y_value
1 1 2
2 1 10
3 1 10
4 1 8
5 1 10
6 1 6
7 1 0
8 1 0
9 2 0
10 2 0
11 2 10
12 2 10
13 2 6
14 2 8
15 2 10
我根据summing-rows-based-on-conditional-in-groups;
尝试了这一点df_temp <- df %>%
group_by(gr) %>%
mutate(rows_to_aggregate=cumsum(y_value==10)) %>%
filter(ifelse(gr==1, rows_to_aggregate !=0, ifelse(gr==2, rows_to_aggregate ==0 | y_value==10, rows_to_aggregate ==0))) %>%
filter(ifelse(gr==1, row_number(gr) != 1, ifelse(gr==2, row_number(gr) != n(), rows_to_aggregate ==0)))
但如果我rows_to_aggregate !=0
gr==1
感兴趣的行将会消失!此时的任何指南都将不胜感激!
答案 0 :(得分:2)
不知道如何在dplyr中执行此操作,但此代码似乎可以正常工作
gr1 = df[df$gr==1,]
last = tail(which(gr1$y_value==10),1)
gr1 = gr1[1:(last-1),]
gr2 = df[df$gr==2,]
first = head(which(gr2$y_value==10),1)
gr2 = gr2[(first+1):dim(gr2)[1],]
final = rbind(gr1,gr2)
答案 1 :(得分:2)
df_to_aggregate <- df %>%
group_by(gr) %>%
mutate(rows_to_aggregate = cumsum(y_value == 10)) %>%
filter(!(gr == 1 & rows_to_aggregate == max(rows_to_aggregate) & y_value != 10)) %>%
filter(!(gr == 2 & rows_to_aggregate == 0)) %>%
select(-rows_to_aggregate)
df_to_aggregate
# A tibble: 10 x 2
# Groups: gr [2]
gr y_value
<dbl> <dbl>
1 1 2
2 1 10
3 1 10
4 1 8
5 1 10
6 2 10
7 2 10
8 2 6
9 2 8
10 2 10
答案 2 :(得分:1)
对于每个slice
,您可以gr
使用不同的切片条件。
df %>%
group_by(gr) %>%
slice(if(any(gr==1)) {1:max(which(y_value==10))} else {min(which(y_value==10)):n()})
gr y_value 1 1 2 2 1 10 3 1 10 4 1 8 5 1 10 6 2 10 7 2 10 8 2 6 9 2 8 10 2 10