如何结合tidyr中不需要的结果?

时间:2016-06-09 07:08:20

标签: r tidyr purrr

数据

x <- c(1:10, 13:22)
y <- numeric(20)
## Create first segment
y[1:10] <- 20:11 + rnorm(10, 0, 1.5)
## Create second segment
y[11:20] <- seq(11, 15, len=10) + rnorm(10, 0, 1.5)

目的

应用segmented函数进行分段线性回归。我在第一次tidyr::nest数据集然后使用purrr包之后就这样做了。最后,我unnested获得所需的输出。以下是代码:

df <- data.frame(o = "A", x = x, y= y)

library(tidyr)
library(dplyr)

by_o <- df %>% 
  group_by(o) %>% 
  nest()



segf <- function(df){
  require(segmented)
  segmented(lm(y~x, data=df), seg.Z = ~x, psi=14,
            control = seg.control(seed = 2))
}

library(purrr)

models <- by_o %>% 
  mutate(segs = data %>% map(segf))

m <- models %>% mutate(psi = segs %>% map(function(x) round(x$psi[2],0)),
                       slo = map(segs, function(x) slope(x)[[1]][,1]))


up <- unnest(m, psi)
us <- unnest(m, slo)
ud <- unnest(m, data)

期望输出:

基本上,psix之后的slope更改。所以,我想要以下输出:

> dput(ud)
structure(list(o = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "A", class = "factor"), 
    x = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 13L, 14L, 
    15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L), y = c(18.8337487576471, 
    19.7196093890392, 17.9379671924293, 14.7675434512467, 16.4457014421767, 
    15.2094056495268, 10.9797139781902, 14.9949617420451, 12.6078427839913, 
    8.96774220196406, 12.1399686562958, 11.4098925289, 12.0982423698874, 
    13.6885980881852, 13.0854885243419, 11.1517028034879, 13.2448581873284, 
    14.438512104517, 14.6206728457974, 14.0299957736482), slope = c(-0.9909, 
    -0.9909, -0.9909, -0.9909, -0.9909, -0.9909, -0.9909, -0.9909, 
    -0.9909, -0.9909, 0.3146, 0.3146, 0.3146, 0.3146, 0.3146, 
    0.3146, 0.3146, 0.3146, 0.3146, 0.3146)), .Names = c("o", 
"x", "y", "slope"), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

如何以上述方式合并这3个数据集(upusud)?请注意df是玩具数据集。原始df有数百o s,即A,B,......这就是我将xy合并到df的原因

1 个答案:

答案 0 :(得分:0)

我不确定这是否是最好的方式,但以下是为我工作的:

du <- ud %>% 
  group_by(Vehicle.ID2) %>% 
  summarise(psi_last = tail(Time, 1)) %>% 
  ungroup()


up <- up %>% 
  left_join(x = ., y = du) %>% 
  group_by(Vehicle.ID2) %>% 
  do(data.frame(psi = c(.$psi, unique(.$psi_last)))) %>% 
  ungroup()

us <- us %>% 
  arrange(Vehicle.ID2) %>% 
  mutate(psi = up$psi)

ud <- us %>% 
  right_join(ud, by = c('Vehicle.ID2' = 'Vehicle.ID2', 'psi' = 'Time')) %>% 
  fill(slo, .direction = "up")