如何根据分组依据

时间:2019-05-08 11:49:24

标签: r datatable dplyr plyr

我正在尝试计算目标功能的连续比例。

数据集

df <- data.frame(ID = c(11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
                        22, 22, 22, 22, 22, 22, 22, 22, 22, 22),
                 target = c(0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 
                             0, 0, 1, 1, 1, 0, 1, 0, 1, 1))
   ID target
1  11      0
2  11      0
3  11      0
4  11      1
5  11      1
6  11      1
7  11      0
8  11      1
9  11      1
10 11      1
11 22      0
12 22      0
13 22      1
14 22      1
15 22      1
16 22      0
17 22      1
18 22      0
19 22      1
20 22      1

这是我尝试过的:

df <-  df  %>%
  group_by(ID) %>%
  mutate(count_per_ID = row_number(),
         consecutive_target = sequence(rle(as.character(target))$lengths),
         val = ifelse(target == 0, 0, consecutive_target),
         proportion_target_by_ID = val / count_per_ID) %>%
  ungroup()
  • 我创建了count_per_ID来计算每个组 ID 的总行数。
  • 然后,continuous_target功能会计算目标功能中的观测次数,并且每次发生更改时都会重新启动。我所说的变化是在目标值的0或1之间切换。
  • val 根据 target 1或0值将这些值复制到 conecutive_target 中。
  • proportion_target_by_ID 具有 val 功能并除以 count_per_ID

问题在于,当 val 功能中的值为0时,按ID取目标值比例的想法无效。

      ID target count_per_ID consecutive_target   val proportion_target_by_ID
   <dbl>  <dbl>        <int>              <int> <dbl>                   <dbl>
 1    11      0            1                  1     0                   0    
 2    11      0            2                  2     0                   0    
 3    11      0            3                  3     0                   0    
 4    11      1            4                  1     1                   0.25 
 5    11      1            5                  2     2                   0.4  
 6    11      1            6                  3     3                   0.5  
 7    11      0            7                  1     0                   0    
 8    11      1            8                  1     1                   0.125
 9    11      1            9                  2     2                   0.222
10    11      1           10                  3     3                   0.3  
11    22      0            1                  1     0                   0    
12    22      0            2                  2     0                   0    
13    22      1            3                  1     1                   0.333
14    22      1            4                  2     2                   0.5  
15    22      1            5                  3     3                   0.6  
16    22      0            6                  1     0                   0    
17    22      1            7                  1     1                   0.143
18    22      0            8                  1     0                   0    
19    22      1            9                  1     1                   0.111
20    22      1           10                  2     2                   0.2  

结果应如下所示:

      ID target count_per_ID consecutive_target   val proportion_target_by_ID
   <dbl>  <dbl>        <int>              <int> <dbl>                   <dbl>
 1    11      0            1                  1     0                   0    
 2    11      0            2                  2     0                   0    
 3    11      0            3                  3     0                   0    
 4    11      1            4                  1     1                   0.25 
 5    11      1            5                  2     2                   0.4  
 6    11      1            6                  3     3                   0.5  
 7    11      0            7                  1     3                   0.428    
 8    11      1            8                  1     4                   0.5
 9    11      1            9                  2     5                   0.555
10    11      1           10                  3     6                   0.6  
11    22      0            1                  1     0                   0    
12    22      0            2                  2     0                   0    
13    22      1            3                  1     1                   0.333
14    22      1            4                  2     2                   0.5  
15    22      1            5                  3     3                   0.6  
16    22      0            6                  1     3                   0.5    
17    22      1            7                  1     4                   0.571
18    22      0            8                  1     4                   0.5    
19    22      1            9                  1     5                   0.55
20    22      1           10                  2     6                   0.6  

1 个答案:

答案 0 :(得分:0)

一个选项是更改用于从中创建“ val”的代码

val = ifelse(target == 0, 0, consecutive_target

val = cumsum(target != 0)

-完整代码

df %>% 
     group_by(ID) %>% 
     mutate(count_per_ID = row_number(), 
            consecutive_target = sequence(rle(as.character(target))$lengths), 
            val = cumsum(target != 0),
             proportion_target_by_ID = val / count_per_ID)
# A tibble: 20 x 6
# Groups:   ID [2]
#      ID target count_per_ID consecutive_target   val proportion_target_by_ID
#   <dbl>  <dbl>        <int>              <int> <int>                   <dbl>
# 1    11      0            1                  1     0                   0    
# 2    11      0            2                  2     0                   0    
# 3    11      0            3                  3     0                   0    
# 4    11      1            4                  1     1                   0.25 
# 5    11      1            5                  2     2                   0.4  
# 6    11      1            6                  3     3                   0.5  
# 7    11      0            7                  1     3                   0.429
# 8    11      1            8                  1     4                   0.5  
# 9    11      1            9                  2     5                   0.556
#10    11      1           10                  3     6                   0.6  
#11    22      0            1                  1     0                   0    
#12    22      0            2                  2     0                   0    
#13    22      1            3                  1     1                   0.333
#14    22      1            4                  2     2                   0.5  
#15    22      1            5                  3     3                   0.6  
#16    22      0            6                  1     3                   0.5  
#17    22      1            7                  1     4                   0.571
#18    22      0            8                  1     4                   0.5  
#19    22      1            9                  1     5                   0.556
#20    22      1           10                  2     6                   0.6