使用map_dbl

时间:2018-10-29 08:25:11

标签: r dplyr purrr

我正在尝试从小标题中的嵌套列表中提取第n个值。我正在尝试使用tidyverse。

我的原始数据是这样的

> x %>% 
+   select( ., 
+           meta_event, 
+           meta_aktivitet, 
+           meta_subject_id, 
+           time, 
+           time_cum, 
+           vo2) -> 
+ x
> dput(head(x))
structure(list(meta_event = c("001", "001", "001", "001", "001", 
"001"), meta_aktivitet = c("001", "001", "001", "001", "001", 
"001"), meta_subject_id = c("100001", "100001", "100001", "100001", 
"100001", "100001"), time = c("0:10 min", "0:15 min", "0:20 min", 
"0:25 min", "0:30 min", "0:35 min"), time_cum = c(10, 15, 20, 
25, 30, 35), vo2 = c(1.665, 2.515, 2.641, 2.677, 2.66, 2.712)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

并嵌套

> x %>% 
+   group_by( ., 
+             meta_event, 
+             meta_aktivitet, 
+             meta_subject_id) %>% 
+   nest() -> 
+ x_n
> head(x_n)
# A tibble: 6 x 4
  meta_event meta_aktivitet meta_subject_id data      
  <chr>      <chr>          <chr>           <list>    
1 001        001            100001          <tibble [~
2 001        001            100002          <tibble [~
3 001        001            100003          <tibble [~
4 001        001            100004          <tibble [~
5 001        001            100005          <tibble [~
6 001        001            100006          <tibble [~

在这里,我尝试通过mutate和map_dbl提取一些信息。我尝试了不同的方式,没有任何运气。

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1,
+           vald_start = map_dbl( data, ~ nth( "time_cum", 
+                                              n = index_1, 
+                                              order_by = "time_cum"))) %>%
+   unnest() %>% View()
Error in mutate_impl(.data, dots) : 
  Evaluation error: length(n) == 1 is not TRUE.
In addition: There were 20 warnings (use warnings() to see them)

似乎在最后一个突变上失败了。如果我不带“ vald_start”运行代码,则会产生以下结果

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1) %>%
+   unnest() ->
+ x_r
There were 20 warnings (use warnings() to see them)
> x_r
# A tibble: 10,639 x 10
   meta_event meta_aktivitet meta_subject_id vald_end    h1 index_1 index_2
   <chr>      <chr>          <chr>              <dbl> <dbl>   <dbl>   <dbl>
 1 001        001            100001              4015  3115     491     490
 2 001        001            100001              4015  3115     491     490
 3 001        001            100001              4015  3115     491     490
 4 001        001            100001              4015  3115     491     490
 5 001        001            100001              4015  3115     491     490
 6 001        001            100001              4015  3115     491     490
 7 001        001            100001              4015  3115     491     490
 8 001        001            100001              4015  3115     491     490
 9 001        001            100001              4015  3115     491     490
10 001        001            100001              4015  3115     491     490
# ... with 10,629 more rows, and 3 more variables: time <chr>,
#   time_cum <dbl>, vo2 <dbl>
> dput(head(x_r))
structure(list(meta_event = c("001", "001", "001", "001", "001", 
"001"), meta_aktivitet = c("001", "001", "001", "001", "001", 
"001"), meta_subject_id = c("100001", "100001", "100001", "100001", 
"100001", "100001"), vald_end = c(4015, 4015, 4015, 4015, 4015, 
4015), h1 = c(3115, 3115, 3115, 3115, 3115, 3115), index_1 = c(491, 
491, 491, 491, 491, 491), index_2 = c(490, 490, 490, 490, 490, 
490), time = c("0:10 min", "0:15 min", "0:20 min", "0:25 min", 
"0:30 min", "0:35 min"), time_cum = c(10, 15, 20, 25, 30, 35), 
    vo2 = c(1.665, 2.515, 2.641, 2.677, 2.66, 2.712)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

有人可以用nth和map_dbl指向我正确的方向

根据@ Moody_Mudskipper的建议,我尝试了以下代码,没有遇到任何麻烦。我已将第n个函数中的两个字符串“ time_cum”更改为。$ time_cum

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1,
+           vald_start = data %>% map_dbl( ., ~ nth( .$time_cum, 
+                                              n = index_1, 
+                                              order_by = .$time_cum))) %>%
+   unnest() %>% View()
Error in mutate_impl(.data, dots) : 
  Evaluation error: length(n) == 1 is not TRUE.
In addition: There were 20 warnings (use warnings() to see them)

如果我更改代码,所以我引用使用由index_1插入的数字,那么R提供了预期的结果。因此,当我引用index_1列时,我一定做错了。

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1,
+           vald_start = data %>% map_dbl( ., ~ nth( .$time_cum, 
+                                              n = 490, 
+                                              order_by = .$time_cum))) -> 
+ x_r
There were 20 warnings (use warnings() to see them)
> x_r
# A tibble: 21 x 9
   meta_event meta_aktivitet meta_subject_id data        vald_end    h1 index_1 index_2 vald_start
   <chr>      <chr>          <chr>           <list>         <dbl> <dbl>   <dbl>   <dbl>      <dbl>
 1 001        001            100001          <tibble [5~     4015  3115     491     490       2470
 2 001        001            100002          <tibble [5~     3770  2870     459     458       3575
 3 001        001            100003          <tibble [3~     3840  2940     346     345         NA
 4 001        001            100004          <tibble [5~     3630  2730     456     455       3455
 5 001        001            100005          <tibble [5~     3945  3045     493     492       2460
 6 001        001            100006          <tibble [5~     3860  2960     422     421       3660
 7 001        001            100007          <tibble [4~     3480  2580     356     355         NA
 8 002        001            100001          <tibble [5~     4570  3670     498     497       2450
 9 002        001            100002          <tibble [5~     3755  2855     424     423       3640
10 002        001            100003          <tibble [4~     3560  2660     361     360         NA
# ... with 11 more rows

来自丹麦的问候。

Dan Olesen

0 个答案:

没有答案