如果时间差小于id和类型的值,则求和

时间:2017-10-16 10:48:43

标签: r dataframe time datatable purrr

关于这个问题: Sum if the date difference is smaller than a value 现在,由于@Davis Vaughan,我有可能计算过去12小时内发生的事件数量:

df <- tribble(
  ~fechayhora,        ~id,       ~tipo,
  "2017-03-17 08:03:00", "A",  "APF",
  "2017-05-17 10:34:00", "A",  "APF",
  "2017-05-17 12:52:00", "A",  "APF",
  "2017-05-17 08:52:00", "A",  "APP",
  "2017-05-17 10:52:00", "A",  "APP",
  "2017-05-17 10:46:00", "B",  "APP",
  "2017-05-17 14:23:00", "B",  "APP",
  "2017-05-17 17:29:00", "B",  "APF"
)

df <- df %>%
  mutate(fechayhora = as.POSIXct(fechayhora),
         minus_12   = fechayhora - hours(12))

df <- df %>% mutate(
  number_of_APF_12h = map2_dbl(.x = fechayhora, 
                               .y = minus_12, 
                               .f = ~sum(between(df$fechayhora, .y, .x)) - 
1)) 

然后我尝试做同样的事情,但按照&#34; id&#34;进行分组。并且通过&#34; tipo&#34; (类型)。我尝试过数据表和数据框,没有成功:

df=df[,number_of_failures_12h = map2_dbl(.x = fechayhora, 
                               .y = minus_12, 
                               .f = ~sum(between(df$fechayhora, .y, .x)) - 
1)),by=.(tipo,id)]

df <- df %>%
group_by(id,tipo)
%>% mutate(
  number_of_failure = map2_dbl(.x = fechayhora, 
                               .y = minus_12, 
                               .f = ~sum(between(df$fechayhora, .y, .x)) - 
1)) %>%
ungroup()

预期结果:

   fechayhora             id    tipo      n_APP   n_APF
   "2017-03-17 08:03:00", "A",  "APF",    0       0 
   "2017-05-17 10:34:00", "A",  "APF",    0       1
   "2017-05-17 12:52:00", "A",  "APF",    0       2
   "2017-05-17 08:52:00", "A",  "APP",    0       2
   "2017-05-17 10:52:00", "A",  "APP",    1       2  
   "2017-05-17 10:46:00", "B",  "APP",    0       0
   "2017-05-17 14:23:00", "B",  "APP",    1       0
   "2017-05-17 17:29:00", "B",  "APF"     0       0

谢谢!

1 个答案:

答案 0 :(得分:0)

必须有一种更为优雅的方式,但这应该是这样的:

# Auxiliary function
count_failures <- function(group, last_12, rowid, type) {
   group[1:rowid-1, ] %>% 
    filter(tipo %in% type & fechayhora >= last_12) %>% 
    nrow()
}

split_by_group <- df %>% 
  group_by(id) %>% 
  do(data = (.)) %>% 
  select(data) %>% 
  map(identity) %>% 
  .[[1]]

df_s <- split_by_group %>% 
  map(arrange, fechayhora) %>% 
  map(.f = function(x) { 
    x %>% 
      rowid_to_column() %>% 
      rowwise() %>% 
      mutate(n_APP = count_failures(x, minus_12, rowid, "APP"),
             n_APF = count_failures(x, minus_12, rowid, "APF")) %>% 
      ungroup() %>% 
      select(-rowid)
      }) %>% 
  bind_rows()

输出:

# A tibble: 8 x 6
           fechayhora    id  tipo            minus_12 n_APP n_APF
               <dttm> <chr> <chr>              <dttm> <int> <int>
1 2017-03-17 08:03:00     A   APF 2017-03-16 20:03:00     0     0
2 2017-05-17 08:52:00     A   APP 2017-05-16 20:52:00     0     0
3 2017-05-17 10:34:00     A   APF 2017-05-16 22:34:00     1     0
4 2017-05-17 10:52:00     A   APP 2017-05-16 22:52:00     1     1
5 2017-05-17 12:52:00     A   APF 2017-05-17 00:52:00     2     1
6 2017-05-17 10:46:00     B   APP 2017-05-16 22:46:00     0     0
7 2017-05-17 14:23:00     B   APP 2017-05-17 02:23:00     1     0
8 2017-05-17 17:29:00     B   APF 2017-05-17 05:29:00     2     0