根据其他列

时间:2018-04-05 07:43:25

标签: r dplyr conditional-statements

此处提供了过滤后的数据集:

     ID       Date Location   Method Lines Session_Number Start_Session End_Session
1  1257 2017-02-02     FSZ5 Trolling     2              1         07:11        <NA>
2  1258 2017-02-02     FSZ5 Trolling     2              1         07:11        <NA>
3  1259 2017-02-02     FSZ5 Trolling     2              1         07:11       07:49
4  1260 2017-02-02     FSZ6   Bottom     5              2         08:05       07:49
5  1261 2017-02-02     FSZ6   Bottom     5              2         08:05       07:49
6  1262 2017-02-02     FSZ6   Bottom     5              2         08:05       07:49
7  1263 2017-02-02     FSZ6   Bottom     5              2         08:05       07:49
93 1349 2017-03-26     FSZ1   Bottom     3              3         18:28       18:23
94 1350 2017-03-26     FSZ1   Bottom     3              3         18:28       18:23
95 1351 2017-03-26     FSZ1   Bottom     3              3         18:28       18:45
   Session_Length                  Species
1              NA         Aprion virescens
2              NA        Euthynnus affinis
3              NA                     <NA>
4              NA Epinephelus multinotatus
5              NA        Caranx melampygus
6              NA        Caranx melampygus
7              NA           Lutjanus bohar
93             NA Epinephelus multinotatus
94             NA      Lethrinus olivaceus
95             NA                     <NA>

stack.example <- structure(list(ID = c(1257L, 1258L, 1259L, 1260L, 1261L, 1262L,1263L, 1349L, 1350L, 1351L), Date = structure(c(17199, 17199,17199, 17199, 17199, 17199, 17199, 17251, 17251, 17251), class = "Date"),Location = structure(c(5L, 5L, 5L, 6L, 6L, 6L, 6L, 1L, 1L,1L), .Label = c("FSZ1", "FSZ2", "FSZ3", "FSZ4", "FSZ5", "FSZ6","Other location"), class = "factor"), Method = structure(c(2L,2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Bottom","Trolling"), class = "factor"), Lines = c("2", "2", "2","5", "5", "5", "5", "3", "3", "3"), Session_Number = c("1","1", "1", "2", "2", "2", "2", "3", "3", "3"), Start_Session = c("07:11","07:11", "07:11", "08:05", "08:05", "08:05", "08:05", "18:28","18:28", "18:28"), End_Session = c(NA, NA, "07:49", "07:49","07:49", "07:49", "07:49", "18:23", "18:23", "18:45"), Session_Length = c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA), Species = structure(c(3L,13L, NA, 10L, 5L, 5L, 24L, 10L, 19L, NA), .Label = c("Acanthocybium solandri","Aethaloperca rogaa", "Aprion virescens", "Caranx ignobilis","Caranx melampygus", "Cephalopholis sonnerati", "Elagatis bipinnulata","Epinephelus fasciatus", "Epinephelus fuscoguttatus", "Epinephelus multinotatus","Epinephelus polyphekadion", "Epinephelus tukula", "Euthynnus affinis","Gymnosarda unicolor", "Lethrinus conchyliatus", "Lethrinus lentjan","Lethrinus microdon", "Lethrinus nebulosus", "Lethrinus olivaceus","Lethrinus rubrioperculatus", "Lethrinus variegatus", "Lutjanidae spp.","Lutjanus bengalensis", "Lutjanus bohar", "Lutjanus rivulatus","Momotaxis gradoculis", "Other species", "Plectropomus pessuliferus","Sphyraena barracuda", "Thunnus albacares", "Variola louti"), class = "factor")), .Names = c("ID", "Date", "Location","Method", "Lines", "Session_Number", "Start_Session", "End_Session","Session_Length", "Species"), row.names = c(1L, 2L, 3L, 4L, 5L,6L, 7L, 93L, 94L, 95L), class = "data.frame") 

我正在尝试获得一个列Session_Length,其中计算分钟数 使用列Session_StartSession_End

以分钟为单位的长度应与特定日期,位置,方法和Session_Number相对应。 Sestraightforwardward。

我正在努力的地方是我的数据集对于每个捕获的鱼都有不正确的开始和结束会话时间,在每个钓鱼会话底部生成的汇总行的异常提供了正确的开始钓鱼时段的结束时间(从Cyber​​Tracker导出的原始数据)。

所以,我正在尝试

  • 使用正确的开始和结束时间获取相应会话的底行值
  • 计算出时间长度(以分钟为单位)
  • 将值应用于与日期,位置,方法和Session_Number相关的特定钓鱼会话中每一行的Session_Length列

示例

2017-02-02 2捕获鱼 Trolling

生成一行,为07:49 Session_Number提供 07:11 并结束 1 时间

因此,对于在钓鱼期间捕获的2条鱼,Session_Number 1 的Session_Length的行值为 48

我可以手动进行,但我会花一些时间观察1000。任何人都可以提供这样做的方法吗? 我正在考虑 dplyr - group_by功能,但无法将它拼凑在一起。

要在计算和应用Session_Length值后删除汇总行,将在顶部显示。

Session_StartSession_End列是存在NA值的字符类。

编辑新样本数据

stack.example2 <- structure(list(ID = structure(1257:1351, class = "integer"),Date = structure(c(17199, 17199, 17199, 17199, 17199, 17199,17199, 17199, 17199, 17199, 17199, 17199, 17199, 17199, 17199,17199, 17199, 17199, 17199, 17199, 17199, 17199, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17232,17232, 17232, 17232, 17232, 17232, 17232, 17232, 17232, 17232,17232, 17232, 17250, 17250, 17250, 17250, 17250, 17250, 17250,17250, 17250, 17250, 17251, 17251, 17251, 17251, 17251, 17251,17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251,17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251), class = "Date"),Location = structure(c(5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("FSZ1","FSZ2", "FSZ3", "FSZ4", "FSZ5", "FSZ6", "Other location"), class = "factor"),Method = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Bottom","Trolling"), class = "factor"),
                 Lines = structure(c("2","2", "2", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5","5", "5", "5", "5", "5", "5", "5", "5", "2", "2", "2", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", "2","2", "2", "2", "2", "2", "2", "2", "2", "2", "4", "4", "4","4", "4", "7", "7", "7", "7", "7", "7", "2", "2", "2", "3","3", "3", "3", "2", "2", "2", "2", "2", "3", "3", "3", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "3"), class = "integer"),Session_Number = structure(c("1", "1", "1", "2", "2", "2","2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2","2", "2", "2", "3", "1", "1", "2", "2", "2", "3", "4", "4","4", "4", "5", "5", "5", "6", "6", "6", "6", "6", "7", "8","8", "8", "8", "8", "9", "9", "10", "10", "1", "1", "1","1", "2", "2", "3", "3", "4", "4", "4", "5", "1", "2", "2","2", "2", "2", "2", "3", "3", "4", "1", "1", "1", "1", "2","2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3","3", "3", "3", "3", "3", "3"), class = "integer"),
                 Start_Session = structure(c("07:11","07:11", "07:11", "08:05", "08:05", "08:05", "08:05", "08:05","08:05", "08:05", "08:05", "08:05", "08:05", "08:05", "08:05","08:05", "08:05", "08:05", "08:05", "08:05", "08:05", "10:31","07:19", "07:19", "07:29", "07:29", "07:29", "07:57", "08:08","08:08", "08:08", "08:08", "08:23", "08:23", "08:23", "08:36","08:36", "08:36", "08:36", "08:36", "08:52", "09:06", "09:06","09:06", "09:06", "09:06", "09:27", "09:27", "09:46", "09:46","10:47", "10:47", "10:47", "10:47", "11:03", "11:03", "11:51","11:51", "12:31", "12:31", "12:31", "12:48", "16:54", "17:08","17:08", "17:08", "17:08", "17:08", "17:08", "17:51", "17:51","18:13", "18:18", "18:18", "18:18", "18:18", "18:28", "18:28","18:28", "18:28", "18:28", "18:28", "18:28", "18:28", "18:28","18:28", "18:28", "18:28", "18:28", "18:28", "18:28", "18:28","18:28", "18:28", "18:28"), class = "character"),
                 End_Session = structure(c(NA,NA, "07:49", "07:49", "07:49", "07:49", "07:49", "07:49","07:49", "07:49", "07:49", "07:49", "07:49", "07:49", "07:49","07:49", "07:49", "07:49", "07:49", "07:49", "10:30", "10:41",NA, "07:28", "07:28", "07:28", "07:47", "08:08", "08:08","08:08", "08:08", "08:17", "08:17", "08:17", "08:34", "08:34","08:34", "08:34", "08:34", "08:51", "09:03", "09:03", "09:03","09:03", "09:03", "09:26", "09:26", "09:38", "09:38", "10:34","10:34", "10:34", "10:34", "11:03", "11:03", "11:51", "11:51","12:09", "12:09", "12:09", "12:47", "13:03", "17:03", "17:03","17:03", "17:03", "17:03", "17:03", "17:44", "17:44", "18:12","18:27", "18:27", "18:27", "18:27", "18:23", "18:23", "18:23","18:23", "18:23", "18:23", "18:23", "18:23", "18:23", "18:23","18:23", "18:23", "18:23", "18:23", "18:23", "18:23", "18:23","18:23", "18:23", "18:45"), class = "character"), Session_Length = structure(c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA), class = "character"),
                 Species = structure(c(3L,13L, NA, 10L, 5L, 5L, 24L, 8L, 11L, 3L, 10L, 3L, 25L, 9L,24L, 10L, 2L, 11L, 31L, 12L, NA, NA, 19L, NA, 18L, 18L, NA,NA, 18L, 10L, 18L, NA, 6L, 19L, NA, 3L, 6L, 3L, 18L, NA,NA, 6L, 18L, 18L, 18L, NA, 6L, NA, 7L, NA, 4L, 4L, 29L, NA,1L, NA, 4L, NA, 10L, 10L, NA, NA, NA, 10L, 24L, 12L, 12L,24L, NA, 4L, NA, NA, 24L, 24L, 31L, NA, 14L, 1L, 1L, 1L,30L, 20L, 20L, 20L, 24L, 24L, 24L, 31L, 31L, 31L, 10L, 10L,10L, 19L, NA), .Label = c("Acanthocybium solandri", "Aethaloperca rogaa","Aprion virescens", "Caranx ignobilis", "Caranx melampygus","Cephalopholis sonnerati", "Elagatis bipinnulata", "Epinephelus fasciatus","Epinephelus fuscoguttatus", "Epinephelus multinotatus","Epinephelus polyphekadion", "Epinephelus tukula", "Euthynnus affinis","Gymnosarda unicolor", "Lethrinus conchyliatus", "Lethrinus lentjan","Lethrinus microdon", "Lethrinus nebulosus", "Lethrinus olivaceus","Lethrinus rubrioperculatus", "Lethrinus variegatus", "Lutjanidae spp.","Lutjanus bengalensis", "Lutjanus bohar", "Lutjanus rivulatus","Momotaxis gradoculis", "Other species", "Plectropomus pessuliferus","Sphyraena barracuda", "Thunnus albacares", "Variola louti"), class = "factor")), .Names = c("ID", "Date", "Location","Method", "Lines", "Session_Number", "Start_Session", "End_Session","Session_Length", "Species"), class = "data.frame", row.names = c(NA,-95L))

1 个答案:

答案 0 :(得分:1)

这是一种可能的方法:

library(tidyverse)
library(lubridate)

stack.example %>%
  mutate(End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
         Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/"))) %>%
  group_by(Location, Date, Method,  Session_Number) %>%
  mutate(End_Session = tail(End_Session, 1),
         Start_Session = tail(Start_Session, 1)) %>%
  mutate(Session_Length = End_Session - Start_Session) %>%
  filter(row_number()!=n())
#output
# A tibble: 7 x 10
# Groups: Location, Date, Method, Session_Number [3]
     ID Date       Location Method   Lines Session_Number Start_Session       End_Session         Session_Length Species                 
  <int> <date>     <fct>    <fct>    <chr> <chr>          <dttm>              <dttm>              <time>         <fct>                   
1  1257 2017-02-02 FSZ5     Trolling 2     1              2017-02-02 07:11:00 2017-02-02 07:49:00 38             Aprion virescens        
2  1258 2017-02-02 FSZ5     Trolling 2     1              2017-02-02 07:11:00 2017-02-02 07:49:00 38             Euthynnus affinis       
3  1260 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 07:49:00 -16            Epinephelus multinotatus
4  1261 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 07:49:00 -16            Caranx melampygus       
5  1262 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 07:49:00 -16            Caranx melampygus       
6  1349 2017-03-26 FSZ1     Bottom   3     3              2017-03-26 18:28:00 2017-03-26 18:45:00 17             Epinephelus multinotatus
7  1350 2017-03-26 FSZ1     Bottom   3     3              2017-03-26 18:28:00 2017-03-26 18:45:00 17             Lethrinus olivaceus   

首先将时间转换为POSIXct,然后制作组。在每个组中,将StartEnd值替换为该组的最后一个值。计算length并删除每个组的最后一行。 结果没有多大意义,但我相信这是由于问题中提供了不幸的行样本。

更新:使用新数据。我添加了一行,使用Session_Length中的seconds.to.hmskimisc从秒转换为H:M:S。警告按摩只是告诉你有NA值。

library(kimisc)
stack.example2 %>%
  mutate(End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
         Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/"))) %>%
  group_by(Location, Date, Method,  Session_Number) %>%
  mutate(End_Session = tail(End_Session, 1),
         Start_Session = tail(Start_Session, 1)) %>%
  mutate(Session_Length = End_Session - Start_Session,
         Session_Length = seconds.to.hms(Session_Length)) %>%
  filter(row_number()!=n()) 
#output
# A tibble: 70 x 10
# Groups: Location, Date, Method, Session_Number [19]
      ID Date       Location Method   Lines Session_Number Start_Session       End_Session         Session_Length Species 
   <int> <date>     <fct>    <fct>    <chr> <chr>          <dttm>              <dttm>              <chr>          <fct>   
 1  1257 2017-02-02 FSZ5     Trolling 2     1              2017-02-02 07:11:00 2017-02-02 07:49:00 00:38:00       Aprion ~
 2  1258 2017-02-02 FSZ5     Trolling 2     1              2017-02-02 07:11:00 2017-02-02 07:49:00 00:38:00       Euthynn~
 3  1260 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Epineph~
 4  1261 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Caranx ~
 5  1262 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Caranx ~
 6  1263 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Lutjanu~
 7  1264 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Epineph~
 8  1265 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Epineph~
 9  1266 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Aprion ~
10  1267 2017-02-02 FSZ6     Bottom   5     2              2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00       Epineph~

如果您想避免发出警告,请执行以下操作:

stack.example2 %>%
  group_by(Location, Date, Method,  Session_Number) %>%
  mutate(End_Session = tail(End_Session, 1),
         Start_Session = tail(Start_Session, 1),
         End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
         Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/")),
         Session_Length = End_Session - Start_Session,
         Session_Length = seconds.to.hms(Session_Length)) %>%
  filter(row_number()!=n()) 

编辑:

stack.example2 %>%
  group_by(Location, Date, Method,  Session_Number) %>%
  mutate(End_Session = tail(End_Session, 1),
         Start_Session = tail(Start_Session, 1),
         End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
         Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/")),
         Session_Length = End_Session - Start_Session,
         Session_Length = seconds.to.hms(Session_Length),
         n = n()) %>%
  filter(n == 1 | row_number() != n ) %>%
  select(-n)