我有一些数据,其中一个变量是具有某些要求的会计师。我现在需要知道的是,每个ID计数器达到1的次数,如果连续有多个1,则只需计数1。
例如,假设ID的计数器为:1、0、0、1、1、0、0、1、1、1、0、0。我不得不说id的频率为3。
Frec_counter计算1.出现的非连续次数。如果有连续的1,则最后一个编号。
我的数据:
id <- c(10,10,10,10,10,11,11,11,11,11,11,12,12,12,13, 13, 15, 14)
counter <- c(0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1)
DF <- data.frame(id, counter); DF
Id 10具有0,0,1,1,0。
5个数据,但只有1个非连续数据,因此将其设置为frec_counter 0,0,0,1,0
我想要的输出:
id <- c(10,10,10,10,10,11,11,11,11,11,11,12,12,12,13, 13, 15, 14)
counter <- c(0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1)
frec_counter <- c(0,0,0,1,0,1,0,2,0,0,3,0,0,1,0,0,1,1)
max_counter <- c(1,1,1,1,1,3,3,3,3,3,3,1,1,1,0,0,1,1)
DF <- data.frame(id, counter, frec_counter, max_counter); DF
答案 0 :(得分:1)
您的数据:
id <- c(10,10,10,10,10,11,11,11,11,11,11,12,12,12,13, 13, 15, 14)
counter <- c(0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,1)
DF <- data.frame(id, counter)
id counter
1 10 0
2 10 0
3 10 1
4 10 1
5 10 0
6 11 1
7 11 0
8 11 1
9 11 0
10 11 1
11 11 1
12 12 1
13 12 1
14 12 1
15 13 0
16 13 0
17 15 1
18 14 1
如果您想要的只是最终计数,我们可以在基数R中进行:
counts <- with(DF, split(counter, id))
lengths <- lapply(counts, rle)
final <- lapply(lengths, function(x) sum(x$values == 1))
$`10`
[1] 1
$`11`
[1] 3
$`12`
[1] 1
$`13`
[1] 0
$`14`
[1] 1
$`15`
[1] 1
但是,由于您特别想要带有中间“标志”的数据框,因此 tidyverse 一组软件包的效果更好:
library(tidyverse)
df.new <- DF %>%
group_by(id) %>%
mutate(
frec_counter = counter == 1 & (is.na(lead(counter)) | lead(counter == 0)),
frec_counter = as.numeric(frec_counter),
max_counter = sum(frec_counter)
)
# A tibble: 18 x 4
# Groups: id [6]
id counter frec_counter max_counter
<dbl> <dbl> <dbl> <dbl>
1 10 0 0 1
2 10 0 0 1
3 10 1 0 1
4 10 1 1 1
5 10 0 0 1
6 11 1 1 3
7 11 0 0 3
8 11 1 1 3
9 11 0 0 3
10 11 1 0 3
11 11 1 1 3
12 12 1 0 1
13 12 1 0 1
14 12 1 1 1
15 13 0 0 0
16 13 0 0 0
17 15 1 1 1
18 14 1 1 1
答案 1 :(得分:1)
这里是使用tidyverse的一种方法:
library(tidyverse)
DF %>%
group_by(id) %>% #group by id
mutate(one = ifelse(counter == lead(counter), 0, counter) #if the leading value is the same replace the value with 0
one = ifelse(is.na(one), counter, one), #to handle last in group where lead results in NA
frec_counter1 = cumsum(one), #get cumulative sum of 1s
frec_counter1 = ifelse(one == 0, 0 , frec_counter1), #replace the cumsum values with 0 where approprate
max_counter1 = max(frec_counter1)) %>% #get the max frec_counter1 per group
select(-one) #remove dummy variable
#output
id counter frec_counter max_counter frec_counter1 max_counter1
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 10 0 0 1 0 1
2 10 0 0 1 0 1
3 10 1 0 1 0 1
4 10 1 1 1 1 1
5 10 0 0 1 0 1
6 11 1 1 3 1 3
7 11 0 0 3 0 3
8 11 1 2 3 2 3
9 11 0 0 3 0 3
10 11 1 0 3 0 3
11 11 1 3 3 3 3
12 12 1 0 1 0 1
13 12 1 0 1 0 1
14 12 1 1 1 1 1
15 13 0 0 0 0 0
16 13 0 0 0 0 0
17 15 1 1 1 1 1
18 14 1 1 1 1 1