此处提供了过滤后的数据集:
ID Date Location Method Lines Session_Number Start_Session End_Session
1 1257 2017-02-02 FSZ5 Trolling 2 1 07:11 <NA>
2 1258 2017-02-02 FSZ5 Trolling 2 1 07:11 <NA>
3 1259 2017-02-02 FSZ5 Trolling 2 1 07:11 07:49
4 1260 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
5 1261 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
6 1262 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
7 1263 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
93 1349 2017-03-26 FSZ1 Bottom 3 3 18:28 18:23
94 1350 2017-03-26 FSZ1 Bottom 3 3 18:28 18:23
95 1351 2017-03-26 FSZ1 Bottom 3 3 18:28 18:45
Session_Length Species
1 NA Aprion virescens
2 NA Euthynnus affinis
3 NA <NA>
4 NA Epinephelus multinotatus
5 NA Caranx melampygus
6 NA Caranx melampygus
7 NA Lutjanus bohar
93 NA Epinephelus multinotatus
94 NA Lethrinus olivaceus
95 NA <NA>
stack.example <- structure(list(ID = c(1257L, 1258L, 1259L, 1260L, 1261L, 1262L,1263L, 1349L, 1350L, 1351L), Date = structure(c(17199, 17199,17199, 17199, 17199, 17199, 17199, 17251, 17251, 17251), class = "Date"),Location = structure(c(5L, 5L, 5L, 6L, 6L, 6L, 6L, 1L, 1L,1L), .Label = c("FSZ1", "FSZ2", "FSZ3", "FSZ4", "FSZ5", "FSZ6","Other location"), class = "factor"), Method = structure(c(2L,2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Bottom","Trolling"), class = "factor"), Lines = c("2", "2", "2","5", "5", "5", "5", "3", "3", "3"), Session_Number = c("1","1", "1", "2", "2", "2", "2", "3", "3", "3"), Start_Session = c("07:11","07:11", "07:11", "08:05", "08:05", "08:05", "08:05", "18:28","18:28", "18:28"), End_Session = c(NA, NA, "07:49", "07:49","07:49", "07:49", "07:49", "18:23", "18:23", "18:45"), Session_Length = c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA), Species = structure(c(3L,13L, NA, 10L, 5L, 5L, 24L, 10L, 19L, NA), .Label = c("Acanthocybium solandri","Aethaloperca rogaa", "Aprion virescens", "Caranx ignobilis","Caranx melampygus", "Cephalopholis sonnerati", "Elagatis bipinnulata","Epinephelus fasciatus", "Epinephelus fuscoguttatus", "Epinephelus multinotatus","Epinephelus polyphekadion", "Epinephelus tukula", "Euthynnus affinis","Gymnosarda unicolor", "Lethrinus conchyliatus", "Lethrinus lentjan","Lethrinus microdon", "Lethrinus nebulosus", "Lethrinus olivaceus","Lethrinus rubrioperculatus", "Lethrinus variegatus", "Lutjanidae spp.","Lutjanus bengalensis", "Lutjanus bohar", "Lutjanus rivulatus","Momotaxis gradoculis", "Other species", "Plectropomus pessuliferus","Sphyraena barracuda", "Thunnus albacares", "Variola louti"), class = "factor")), .Names = c("ID", "Date", "Location","Method", "Lines", "Session_Number", "Start_Session", "End_Session","Session_Length", "Species"), row.names = c(1L, 2L, 3L, 4L, 5L,6L, 7L, 93L, 94L, 95L), class = "data.frame")
我正在尝试获得一个列Session_Length
,其中计算分钟数
使用列Session_Start
和Session_End
。
以分钟为单位的长度应与特定日期,位置,方法和Session_Number
相对应。 Sestraightforwardward。
我正在努力的地方是我的数据集对于每个捕获的鱼都有不正确的开始和结束会话时间,在每个钓鱼会话底部生成的汇总行的异常提供了正确的开始钓鱼时段的结束时间(从CyberTracker导出的原始数据)。
所以,我正在尝试:
示例
2017-02-02
2
捕获鱼 Trolling
生成一行,为07:49
Session_Number
提供 07:11 并结束 1
时间
因此,对于在钓鱼期间捕获的2条鱼,Session_Number 1
的Session_Length的行值为 48
。
我可以手动进行,但我会花一些时间观察1000
。任何人都可以提供这样做的方法吗?
我正在考虑 dplyr
- group_by
功能,但无法将它拼凑在一起。
要在计算和应用Session_Length
值后删除汇总行,将在顶部显示。
Session_Start
和Session_End
列是存在NA
值的字符类。
编辑新样本数据
stack.example2 <- structure(list(ID = structure(1257:1351, class = "integer"),Date = structure(c(17199, 17199, 17199, 17199, 17199, 17199,17199, 17199, 17199, 17199, 17199, 17199, 17199, 17199, 17199,17199, 17199, 17199, 17199, 17199, 17199, 17199, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17232,17232, 17232, 17232, 17232, 17232, 17232, 17232, 17232, 17232,17232, 17232, 17250, 17250, 17250, 17250, 17250, 17250, 17250,17250, 17250, 17250, 17251, 17251, 17251, 17251, 17251, 17251,17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251,17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251), class = "Date"),Location = structure(c(5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("FSZ1","FSZ2", "FSZ3", "FSZ4", "FSZ5", "FSZ6", "Other location"), class = "factor"),Method = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Bottom","Trolling"), class = "factor"),
Lines = structure(c("2","2", "2", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5","5", "5", "5", "5", "5", "5", "5", "5", "2", "2", "2", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", "2","2", "2", "2", "2", "2", "2", "2", "2", "2", "4", "4", "4","4", "4", "7", "7", "7", "7", "7", "7", "2", "2", "2", "3","3", "3", "3", "2", "2", "2", "2", "2", "3", "3", "3", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "3"), class = "integer"),Session_Number = structure(c("1", "1", "1", "2", "2", "2","2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2","2", "2", "2", "3", "1", "1", "2", "2", "2", "3", "4", "4","4", "4", "5", "5", "5", "6", "6", "6", "6", "6", "7", "8","8", "8", "8", "8", "9", "9", "10", "10", "1", "1", "1","1", "2", "2", "3", "3", "4", "4", "4", "5", "1", "2", "2","2", "2", "2", "2", "3", "3", "4", "1", "1", "1", "1", "2","2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3","3", "3", "3", "3", "3", "3"), class = "integer"),
Start_Session = structure(c("07:11","07:11", "07:11", "08:05", "08:05", "08:05", "08:05", "08:05","08:05", "08:05", "08:05", "08:05", "08:05", "08:05", "08:05","08:05", "08:05", "08:05", "08:05", "08:05", "08:05", "10:31","07:19", "07:19", "07:29", "07:29", "07:29", "07:57", "08:08","08:08", "08:08", "08:08", "08:23", "08:23", "08:23", "08:36","08:36", "08:36", "08:36", "08:36", "08:52", "09:06", "09:06","09:06", "09:06", "09:06", "09:27", "09:27", "09:46", "09:46","10:47", "10:47", "10:47", "10:47", "11:03", "11:03", "11:51","11:51", "12:31", "12:31", "12:31", "12:48", "16:54", "17:08","17:08", "17:08", "17:08", "17:08", "17:08", "17:51", "17:51","18:13", "18:18", "18:18", "18:18", "18:18", "18:28", "18:28","18:28", "18:28", "18:28", "18:28", "18:28", "18:28", "18:28","18:28", "18:28", "18:28", "18:28", "18:28", "18:28", "18:28","18:28", "18:28", "18:28"), class = "character"),
End_Session = structure(c(NA,NA, "07:49", "07:49", "07:49", "07:49", "07:49", "07:49","07:49", "07:49", "07:49", "07:49", "07:49", "07:49", "07:49","07:49", "07:49", "07:49", "07:49", "07:49", "10:30", "10:41",NA, "07:28", "07:28", "07:28", "07:47", "08:08", "08:08","08:08", "08:08", "08:17", "08:17", "08:17", "08:34", "08:34","08:34", "08:34", "08:34", "08:51", "09:03", "09:03", "09:03","09:03", "09:03", "09:26", "09:26", "09:38", "09:38", "10:34","10:34", "10:34", "10:34", "11:03", "11:03", "11:51", "11:51","12:09", "12:09", "12:09", "12:47", "13:03", "17:03", "17:03","17:03", "17:03", "17:03", "17:03", "17:44", "17:44", "18:12","18:27", "18:27", "18:27", "18:27", "18:23", "18:23", "18:23","18:23", "18:23", "18:23", "18:23", "18:23", "18:23", "18:23","18:23", "18:23", "18:23", "18:23", "18:23", "18:23", "18:23","18:23", "18:23", "18:45"), class = "character"), Session_Length = structure(c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA), class = "character"),
Species = structure(c(3L,13L, NA, 10L, 5L, 5L, 24L, 8L, 11L, 3L, 10L, 3L, 25L, 9L,24L, 10L, 2L, 11L, 31L, 12L, NA, NA, 19L, NA, 18L, 18L, NA,NA, 18L, 10L, 18L, NA, 6L, 19L, NA, 3L, 6L, 3L, 18L, NA,NA, 6L, 18L, 18L, 18L, NA, 6L, NA, 7L, NA, 4L, 4L, 29L, NA,1L, NA, 4L, NA, 10L, 10L, NA, NA, NA, 10L, 24L, 12L, 12L,24L, NA, 4L, NA, NA, 24L, 24L, 31L, NA, 14L, 1L, 1L, 1L,30L, 20L, 20L, 20L, 24L, 24L, 24L, 31L, 31L, 31L, 10L, 10L,10L, 19L, NA), .Label = c("Acanthocybium solandri", "Aethaloperca rogaa","Aprion virescens", "Caranx ignobilis", "Caranx melampygus","Cephalopholis sonnerati", "Elagatis bipinnulata", "Epinephelus fasciatus","Epinephelus fuscoguttatus", "Epinephelus multinotatus","Epinephelus polyphekadion", "Epinephelus tukula", "Euthynnus affinis","Gymnosarda unicolor", "Lethrinus conchyliatus", "Lethrinus lentjan","Lethrinus microdon", "Lethrinus nebulosus", "Lethrinus olivaceus","Lethrinus rubrioperculatus", "Lethrinus variegatus", "Lutjanidae spp.","Lutjanus bengalensis", "Lutjanus bohar", "Lutjanus rivulatus","Momotaxis gradoculis", "Other species", "Plectropomus pessuliferus","Sphyraena barracuda", "Thunnus albacares", "Variola louti"), class = "factor")), .Names = c("ID", "Date", "Location","Method", "Lines", "Session_Number", "Start_Session", "End_Session","Session_Length", "Species"), class = "data.frame", row.names = c(NA,-95L))
答案 0 :(得分:1)
这是一种可能的方法:
library(tidyverse)
library(lubridate)
stack.example %>%
mutate(End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/"))) %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1)) %>%
mutate(Session_Length = End_Session - Start_Session) %>%
filter(row_number()!=n())
#output
# A tibble: 7 x 10
# Groups: Location, Date, Method, Session_Number [3]
ID Date Location Method Lines Session_Number Start_Session End_Session Session_Length Species
<int> <date> <fct> <fct> <chr> <chr> <dttm> <dttm> <time> <fct>
1 1257 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 38 Aprion virescens
2 1258 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 38 Euthynnus affinis
3 1260 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 07:49:00 -16 Epinephelus multinotatus
4 1261 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 07:49:00 -16 Caranx melampygus
5 1262 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 07:49:00 -16 Caranx melampygus
6 1349 2017-03-26 FSZ1 Bottom 3 3 2017-03-26 18:28:00 2017-03-26 18:45:00 17 Epinephelus multinotatus
7 1350 2017-03-26 FSZ1 Bottom 3 3 2017-03-26 18:28:00 2017-03-26 18:45:00 17 Lethrinus olivaceus
首先将时间转换为POSIXct
,然后制作组。在每个组中,将Start
和End
值替换为该组的最后一个值。计算length
并删除每个组的最后一行。
结果没有多大意义,但我相信这是由于问题中提供了不幸的行样本。
更新:使用新数据。我添加了一行,使用Session_Length
中的seconds.to.hms
将kimisc
从秒转换为H:M:S。警告按摩只是告诉你有NA值。
library(kimisc)
stack.example2 %>%
mutate(End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/"))) %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1)) %>%
mutate(Session_Length = End_Session - Start_Session,
Session_Length = seconds.to.hms(Session_Length)) %>%
filter(row_number()!=n())
#output
# A tibble: 70 x 10
# Groups: Location, Date, Method, Session_Number [19]
ID Date Location Method Lines Session_Number Start_Session End_Session Session_Length Species
<int> <date> <fct> <fct> <chr> <chr> <dttm> <dttm> <chr> <fct>
1 1257 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 00:38:00 Aprion ~
2 1258 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 00:38:00 Euthynn~
3 1260 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
4 1261 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Caranx ~
5 1262 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Caranx ~
6 1263 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Lutjanu~
7 1264 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
8 1265 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
9 1266 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Aprion ~
10 1267 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
如果您想避免发出警告,请执行以下操作:
stack.example2 %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1),
End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/")),
Session_Length = End_Session - Start_Session,
Session_Length = seconds.to.hms(Session_Length)) %>%
filter(row_number()!=n())
编辑:
stack.example2 %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1),
End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/")),
Session_Length = End_Session - Start_Session,
Session_Length = seconds.to.hms(Session_Length),
n = n()) %>%
filter(n == 1 | row_number() != n ) %>%
select(-n)