我使用purrr
和segmented
库来适应多个模型。我的目标是首先创建列表列,然后从中提取有用的变量。以下是示例数据框。
请注意,以下数据框只有1 Vehicle.ID2
。在原始数据集中,我有多个Vehicle.ID2
s。
> dput(fedf)
structure(list(Time = c(18.9, 19, 19.1, 19.2, 19.3, 19.4, 19.5,
19.6, 19.7, 19.8, 19.9, 20, 20.1, 20.2, 20.3, 20.4, 20.5, 20.6,
20.7, 20.8, 20.9, 21, 21.1, 21.2, 21.3, 21.4, 21.5, 21.6, 21.7,
21.8, 21.9, 22, 22.1, 22.2, 22.3, 22.4, 22.5, 22.6, 22.7, 22.8,
22.9, 23, 23.1, 23.2, 23.3, 23.4, 23.5, 23.6, 23.7, 23.8, 23.9,
24, 24.1, 24.2, 24.3, 24.4, 24.5, 24.6, 24.7, 24.8, 24.9, 25,
25.1, 25.2, 25.3, 25.4, 25.5, 25.6, 25.7, 25.8, 25.9, 26, 26.1,
26.2, 26.3, 26.4, 26.5, 26.6, 26.7, 26.8, 26.9, 27, 27.1, 27.2,
27.3, 27.4, 27.5, 27.6, 27.7, 27.8, 27.9, 28, 28.1, 28.2, 28.3,
28.4, 28.5, 28.6, 28.7, 28.8, 28.9, 29, 29.1, 29.2, 29.3, 29.4,
29.5, 29.6, 29.7, 29.8, 29.9, 30, 30.1, 30.2, 30.3, 30.4, 30.5,
30.6, 30.7, 30.8, 30.9), Vehicle.ID2 = c("11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1"), svel_mean = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.76472709090909, 4.77719854545454,
4.78953218181818, 4.80158254545454, 4.81301290909091, 4.82320690909091,
4.83122454545454, 4.83587545454545, 4.83594327272727, 4.83050254545455,
4.81921854545454, 4.80256563636364, 4.78185709090909, 4.758948,
4.73566690909091, 4.71328727272727, 4.69232381818182, 4.67267072727273,
4.65389381818182, 4.63546509090909, 4.61684309090909, 4.59742581818182,
4.57649418181818, 4.55325309090909, 4.52699290909091, 4.49726818181818,
4.46394654545455, 4.42708672727273, 4.38678090909091, 4.34312763636364,
4.29634472727273, 4.24687272727273, 4.19533527272727, 4.14238290909091,
4.08854727272727, 4.03419072727273, 3.97953490909091, 3.92470963636364,
3.86979, 3.81481781818182, 3.75981527272727, 3.704794, 3.64976254545455,
3.59472727272727, 3.53969090909091, 3.48465327272727, 3.42961290909091,
3.37456781818182, 3.31951545454545, 3.26445218181818, 3.20936963636364,
3.15425254545455, 3.09907545454545, 3.04379436363636, 2.98833436363636,
2.93256963636364, 2.87630054545454, 2.81926036363636, 2.76120945454545,
2.702132, 2.64245218181818, 2.58312109090909, 2.52547036363636,
2.47087418181818, 2.42039836363636, 2.37458763636364, 2.33338690909091,
2.29618309090909, 2.262014, 2.22988945454545, 2.19903090909091,
2.16892327272727, 2.13925218181818, 2.10983054545455, 2.08054945454545,
2.05134563636364, 2.02218490909091, 1.99305654545455, 1.96398472727273,
1.935058, 1.90647618181818, 1.87860145454545, 1.85197418181818,
1.82724109090909, 1.80498454545455, 1.785546, 1.76897709090909,
1.75517927272727, 1.74412, 1.73593218181818, 1.73079563636364,
1.72870563636364, 1.72933218181818, 1.73208145454545), dssvel = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0,
0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 2, 0)), .Names = c("Time", "Vehicle.ID2", "svel_mean",
"dssvel"), row.names = c(NA, -121L), class = c("tbl_df", "tbl",
"data.frame"))
segf2_1 <- function(df){
try(segmented(lm(svel_mean ~ Time, data=df), seg.Z = ~Time,
psi = list(Time = df$Time[which(df$dssvel != 0)])#,
#control = seg.control(seed=2)
),
silent=TRUE)
}
library(dplyr)
library(segmented)
library(purrr)
library(tidyr)
## Nesting the data frame --------------------
by_veh <- fedf %>%
group_by(Vehicle.ID2) %>%
nest()
## Applying the function ---------------------
modelz <- by_veh %>%
mutate(segs = map(data, segf2_1))
## Getting time, acceeration and speed ---------
m <- modelz %>%
mutate(time_at_action = segs %>% map(function(x) round(x$psi[,2],0)),
action_accel = map(segs, function(x) slope(x)[[1]][,1]),
fitted_speed = map(segs, function(x) fitted(x))
)
数据框m
有4个列表列,其中包含有用的数据。
我的数据集中基本上需要两个新列:每个段之间的拟合速度和恒定加速度。因此,我的预期输出如下:
> dput(data_set)
structure(list(Vehicle.ID2 = c("11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1", "11.1",
"11.1", "11.1", "11.1", "11.1", "11.1"), Time = c(18.9, 19, 19.1,
19.2, 19.3, 19.4, 19.5, 19.6, 19.7, 19.8, 19.9, 20, 20.1, 20.2,
20.3, 20.4, 20.5, 20.6, 20.7, 20.8, 20.9, 21, 21.1, 21.2, 21.3,
21.4, 21.5, 21.6, 21.7, 21.8, 21.9, 22, 22.1, 22.2, 22.3, 22.4,
22.5, 22.6, 22.7, 22.8, 22.9, 23, 23.1, 23.2, 23.3, 23.4, 23.5,
23.6, 23.7, 23.8, 23.9, 24, 24.1, 24.2, 24.3, 24.4, 24.5, 24.6,
24.7, 24.8, 24.9, 25, 25.1, 25.2, 25.3, 25.4, 25.5, 25.6, 25.7,
25.8, 25.9, 26, 26.1, 26.2, 26.3, 26.4, 26.5, 26.6, 26.7, 26.8,
26.9, 27, 27.1, 27.2, 27.3, 27.4, 27.5, 27.6, 27.7, 27.8, 27.9,
28, 28.1, 28.2, 28.3, 28.4, 28.5, 28.6, 28.7, 28.8, 28.9, 29,
29.1, 29.2, 29.3, 29.4, 29.5, 29.6, 29.7, 29.8, 29.9, 30, 30.1,
30.2, 30.3, 30.4, 30.5, 30.6, 30.7, 30.8, 30.9), svel_mean = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.76472709090909, 4.77719854545454,
4.78953218181818, 4.80158254545454, 4.81301290909091, 4.82320690909091,
4.83122454545454, 4.83587545454545, 4.83594327272727, 4.83050254545455,
4.81921854545454, 4.80256563636364, 4.78185709090909, 4.758948,
4.73566690909091, 4.71328727272727, 4.69232381818182, 4.67267072727273,
4.65389381818182, 4.63546509090909, 4.61684309090909, 4.59742581818182,
4.57649418181818, 4.55325309090909, 4.52699290909091, 4.49726818181818,
4.46394654545455, 4.42708672727273, 4.38678090909091, 4.34312763636364,
4.29634472727273, 4.24687272727273, 4.19533527272727, 4.14238290909091,
4.08854727272727, 4.03419072727273, 3.97953490909091, 3.92470963636364,
3.86979, 3.81481781818182, 3.75981527272727, 3.704794, 3.64976254545455,
3.59472727272727, 3.53969090909091, 3.48465327272727, 3.42961290909091,
3.37456781818182, 3.31951545454545, 3.26445218181818, 3.20936963636364,
3.15425254545455, 3.09907545454545, 3.04379436363636, 2.98833436363636,
2.93256963636364, 2.87630054545454, 2.81926036363636, 2.76120945454545,
2.702132, 2.64245218181818, 2.58312109090909, 2.52547036363636,
2.47087418181818, 2.42039836363636, 2.37458763636364, 2.33338690909091,
2.29618309090909, 2.262014, 2.22988945454545, 2.19903090909091,
2.16892327272727, 2.13925218181818, 2.10983054545455, 2.08054945454545,
2.05134563636364, 2.02218490909091, 1.99305654545455, 1.96398472727273,
1.935058, 1.90647618181818, 1.87860145454545, 1.85197418181818,
1.82724109090909, 1.80498454545455, 1.785546, 1.76897709090909,
1.75517927272727, 1.74412, 1.73593218181818, 1.73079563636364,
1.72870563636364, 1.72933218181818, 1.73208145454545), dssvel = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0,
0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 2, 0), fitted_speed = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 4.86921514029415, 4.85760307362944, 4.84599100696473,
4.83437894030001, 4.82276687363529, 4.81115480697058, 4.79954274030586,
4.78793067364114, 4.77631860697643, 4.76470654031171, 4.75309447364699,
4.74148240698228, 4.72987034031756, 4.71825827365285, 4.70664620698813,
4.69503414032341, 4.6834220736587, 4.67181000699398, 4.66019794032926,
4.64858587366455, 4.63697380699983, 4.62536174033511, 4.6137496736704,
4.60213760700568, 4.59052554034097, 4.56141666682718, 4.50709477451488,
4.45277288220258, 4.39845098989029, 4.34412909757799, 4.28980720526569,
4.23548531295339, 4.1811634206411, 4.1268415283288, 4.0725196360165,
4.0181977437042, 3.96387585139191, 3.90955395907961, 3.85523206676731,
3.80091017445502, 3.74658828214272, 3.69226638983042, 3.63794449751812,
3.58362260520583, 3.52930071289353, 3.47497882058123, 3.42065692826894,
3.36633503595664, 3.31201314364434, 3.25769125133204, 3.20336935901975,
3.14904746670745, 3.09472557439515, 3.04040368208286, 2.98608178977056,
2.93175989745826, 2.87743800514596, 2.82311611283367, 2.76879422052137,
2.71447232820907, 2.66015043589677, 2.60582854358448, 2.55150665127218,
2.49718475895988, 2.44286286664759, 2.38854097433529, 2.33421908202299,
2.27989718971069, 2.2255752973984, 2.18528965396358, 2.16277983827573,
2.14027002258788, 2.11776020690003, 2.09525039121218, 2.07274057552433,
2.05023075983648, 2.02772094414863, 2.00521112846078, 1.98270131277293,
1.96019149708508, 1.93768168139723, 1.91517186570939, 1.89266205002154,
1.87015223433369, 1.84764241864584, 1.82513260295799, 1.80262278727014,
1.78011297158229, 1.75760315589444, 1.73509334020659, 1.71258352451874,
1.69007370883089, 1.66756389314304, 1.64505407745519), action_accel = c(-0.1161,
-0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161,
-0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161,
-0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161,
-0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161,
-0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161,
-0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161,
-0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161, -0.1161,
-0.1161, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432,
-0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432,
-0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432,
-0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432,
-0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432,
-0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.5432, -0.2251,
-0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251,
-0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251,
-0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251,
-0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251, -0.2251,
-0.2251)), row.names = c(NA, -121L), .Names = c("Vehicle.ID2",
"Time", "svel_mean", "dssvel", "fitted_speed", "action_accel"
), class = "data.frame")
我使用以下代码来获得我想要的输出。但这很痛苦。我确信必须采用更直接的方法(类似于broom::glance
和broom::tidy
)。请帮我改进这段代码。
library(tidyr)
## Unnesting -----------
time_at_action_df <- unnest(m, time_at_action)
action_accel_df <- unnest(m, action_accel)
fitted_speed_df <- unnest(m, fitted_speed)
data_set <- unnest(m, data)
## First point in time for each pair ----
du <- data_set %>%
group_by(Vehicle.ID2) %>%
summarise(psi_first = head(Time, 1)) %>%
ungroup()
## Action Points ------
time_at_action_df <- time_at_action_df %>%
left_join(x = ., y = du) %>%
group_by(Vehicle.ID2) %>%
do(data.frame(time_at_action = c(unique(.$psi_first), .$time_at_action ))) %>%
ungroup()
## Accelerations/Decelerations and action points -----
action_accel_df <- action_accel_df %>%
arrange(Vehicle.ID2)
action_accel_df <- cbind(action_accel_df, time_at_action = time_at_action_df$time_at_action)
## Combining fitted speeds with original data ------
fitted_speed_df <- fitted_speed_df %>%
group_by(Vehicle.ID2) %>%
do(data.frame(fitted_speed = c(rep(NA,27), .$fitted_speed))) %>%
ungroup()
## Arranging ----
data_set <- data_set %>%
arrange(Vehicle.ID2)
data_set <- cbind(data_set, fitted_speed_df)
data_set <- data_set[,-5]
## Combining slope with original data ---
data_set <- action_accel_df %>%
left_join(x=data_set, y = .,
by = c('Vehicle.ID2' = 'Vehicle.ID2',
'Time' = 'time_at_action')) %>%
fill(action_accel, .direction = "down")
答案 0 :(得分:1)
这是不是很难编码&#39;你的解决方案?之前有人因为split
使用purrr
而对我大喊大叫。代码中断length(time_at_action) != 2
,但我确信这不是一个难以克服的修复挑战。应该推广到多个Vehicle.ID2
。
library(dplyr)
library(segmented)
library(purrr)
library(tidyr)
data = fedf %>% split(., .$Vehicle.ID2)
model = data %>% map(segf2_1)
param = model %>% map( ~ {list(time_at_action = round(.$psi[, 2], 0),
action_accel = slope(.)[[1]][, 1],
fitted_speed = fitted(.))})
dfexp2 = pmap(.l = list(data, model, param),
.f = function(x, y, z) {
x %>% mutate(fitted_speed = c(rep(NA, times = sum(is.na(x$svel_mean))), z$fitted),
time = ifelse(
test = Time < z$time_at_action[1],
yes = z$action_accel[[1]],
no = ifelse(test = (Time > z$time_at_action[1]) & (Time < z$time_at_action[2]),
yes = z$action_accel[[2]],
no = z$action_accel[[3]])))})
dfexp2 %>% bind_rows()
编辑:没有硬编码的新代码:
dfexp2 = pmap(.l = list(data, model, param),
.f = function(x, y, z) {
x %>% mutate(fitted_speed = c(rep(NA, times = sum(is.na(x$svel_mean))), z$fitted),
time = z$action_accel[cut(Time ,c(min(Time), z$time_at_action, max(Time)), right = F)])})