test <- structure(list(Vessel = c("STI Selatar", "STI Selatar", "STI Selatar",
"STI Selatar", "STI Selatar", "STI Selatar", "STI Selatar", "STI Selatar",
"STI Selatar", "STI Selatar", "STI Selatar", "STI Selatar", "STI Amber",
"STI Amber", "STI Amber", "STI Amber", "STI Amber", "STI Amber",
"STI Amber", "STI Amber", "STI Amber"), VoyNum = c(14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 62L, 62L, 62L,
62L, 62L, 62L, 62L, 62L, 62L), Port = c("ROTTERDAM", "ROTTERDAM",
"ENGLISH CHANNEL", "GIBRALTAR", "PIRAEUS", "ELEUSIS", "ELEUSIS",
"AGIOI THEODOROI", "SUEZ CANAL", "SINGAPORE STRAIT", "YOSU",
"DAESAN", "BOTANY BAY", "NAPA NAPA", "NAPA NAPA", "PNGLNG TERMINAL",
"TORRES STRAIT", "SINGAPORE STRAIT", "SINGAPORE", "SINGAPORE STRAIT",
"TOLO HARBOUR"), Function = structure(c(2L, 14L, 8L, 8L, 5L,
14L, 6L, 6L, 1L, 8L, 4L, 4L, 2L, 14L, 6L, 6L, 1L, 8L, 4L, 8L,
4L), .Label = c("Canal Transit", "Commencing", "Delivery", "Discharging",
"Fuelling", "Loading", "Other", "Passing", "Port Call Cancelled",
"Redelivery", "Repair", "Sampling", "Terminating", "Waiting"), class = "factor"),
Date_Arrival = structure(c(978307200, 1535198400, 978307200,
978307200, 1537237440, 1537696800, 1537736460, 1538161920,
1539118440, 978307200, 1541531520, 1541736000, 978307200,
1520194320, 1520258460, 1520654400, 1520800560, 978307200,
1521556560, 978307200, 1522321200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), Date_Departure = structure(c(1535196600,
1536346080, 978307200, 978307200, 1537688160, 1537736340,
1538155080, 1538938800, 1539190800, 978307200, 1541621880,
1541979000, 1519702560, 1520258340, 1520642520, 1520740800,
1520837280, 978307200, 1521894960, 978307200, 1522461600), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), BallastLaden = c(0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), Miles = c(4, 2930, 0, 0, 24, 0.43, 23, 623, 7466,
0, 338, 0, 1720, 0.43, 43, 213, 2594, 0, 1424, 0, 0)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame"))
Miles
变量显示当前行和下一行之间的距离。 VoyNum
是船只的航行ID。 Function
变量包含有关开始加载和卸载的信息。为了找出特定行程的哪个月,我正在寻找Date_Departure
和Date_Arrival
之间的间隔,将其间隔一半,然后将其添加到Date_Departure
中。这是所需的输出:
# A tibble: 8 x 7
Vessel VoyNum FromPort ToPort Miles BallastLaden Month
<chr> <int> <chr> <chr> <dbl> <int> <chr>
1 STI Selatar 14 ROTTERDAM ELEUSIS 2958. 0 Sep
2 STI Selatar 14 ELEUSIS AGIOI THEODOROI 23 1 Sep
3 STI Selatar 14 AGIOI THEODOROI YOSU 8089 1 Oct
4 STI Selatar 14 YOSU DAESAN 338 1 Nov
5 STI Amber 62 BOTANY BAY NAPA NAPA 1720. 0 Mar
6 STI Amber 62 NAPA NAPA PNGLNG TERMINAL 43 1 Mar
7 STI Amber 62 PNGLNG TERMINAL SINGAPORE 2807 1 Mar
8 STI Amber 62 SINGAPORE TOLO HARBOUR 1424 1 Mar
如何结合使用dplyr
和group_by()
来实现summarize()
?
我可以这样总结总的压载或载重里程,但这并不是我真正需要的,因为这些支腿中的每一个都进一步分成了多个端口:
test %>% group_by(Vessel, VoyNum) %>% summarise(BMiles=sum(test$Miles[BallastLaden==0]), LMiles=sum(test$Miles[BallastLaden==1]))
答案 0 :(得分:0)
这是一个tidyverse
解决方案:
library(lubridate)
test %>% group_by(VoyNum) %>% arrange(Date_Departure) %>% mutate(cumMiles = cumsum(Miles)) %>%
filter(Function %in% c("Commencing", "Loading", "Discharging")) %>%
mutate(Miles = c(diff(cumMiles - Miles), last(Miles)), ToPort = lead(Port),
Month = month(Date_Departure + floor(c(Date_Arrival[-1], 0) - Date_Departure) / 2, label = TRUE)) %>%
rename(FromPort = Port) %>% slice(-n()) %>%
select(Vessel, VoyNum, FromPort, ToPort, Miles, BallastLaden, Month)
# A tibble: 8 x 7
# Groups: VoyNum [2]
# Vessel VoyNum FromPort ToPort Miles BallastLaden Month
# <chr> <int> <chr> <chr> <dbl> <int> <ord>
# 1 STI Selatar 14 ROTTERDAM ELEUSIS 2958. 0 Sep
# 2 STI Selatar 14 ELEUSIS AGIOI THEODOROI 23 1 Sep
# 3 STI Selatar 14 AGIOI THEODOROI YOSU 8089 1 Oct
# 4 STI Selatar 14 YOSU DAESAN 338 1 Nov
# 5 STI Amber 62 BOTANY BAY NAPA NAPA 1720. 0 Mar
# 6 STI Amber 62 NAPA NAPA PNGLNG TERMINAL 43 1 Mar
# 7 STI Amber 62 PNGLNG TERMINAL SINGAPORE 2807 1 Mar
# 8 STI Amber 62 SINGAPORE TOLO HARBOUR 1424 1 Mar