我有这个df
我希望添加一个列,其中前一行和后一行的观察值用观察到的相同值填充(现在为NA)。
我玩的却没有成功。
以下是我拥有的df
示例,dffilled
是我想要的。
dput(df)
structure(list(Date = structure(c(1L, 12L, 23L, 26L, 27L, 28L,
29L, 30L, 31L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L, 32L, 43L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L,
53L), .Label = c("5/1/2012", "5/10/2012", "5/11/2012", "5/12/2012",
"5/13/2012", "5/14/2012", "5/15/2012", "5/16/2012", "5/17/2012",
"5/18/2012", "5/19/2012", "5/2/2012", "5/20/2012", "5/21/2012",
"5/22/2012", "5/23/2012", "5/24/2012", "5/25/2012", "5/26/2012",
"5/27/2012", "5/28/2012", "5/29/2012", "5/3/2012", "5/30/2012",
"5/31/2012", "5/4/2012", "5/5/2012", "5/6/2012", "5/7/2012",
"5/8/2012", "5/9/2012", "6/1/2012", "6/10/2012", "6/11/2012",
"6/12/2012", "6/13/2012", "6/14/2012", "6/15/2012", "6/16/2012",
"6/17/2012", "6/18/2012", "6/19/2012", "6/2/2012", "6/20/2012",
"6/21/2012", "6/22/2012", "6/23/2012", "6/24/2012", "6/25/2012",
"6/26/2012", "6/27/2012", "6/28/2012", "6/29/2012", "6/3/2012",
"6/4/2012", "6/5/2012", "6/6/2012", "6/7/2012", "6/8/2012", "6/9/2012"
), class = "factor"), Obs = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 320L, NA, NA,
NA, NA, NA, NA, NA, NA, 321L, 321L, 322L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 323L, NA, NA, NA, NA, 324L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 325L, NA)), .Names = c("Date", "Obs"), class = "data.frame", row.names = c(NA,
-60L))
我希望最终数据框为dffilled
,其中观察日期之前和之后立即填充观察值。
dput(dffilled)
structure(list(Date = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 34L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 35L), .Label = c("5/10/2012",
"5/11/2012", "5/12/2012", "5/13/2012", "5/14/2012", "5/15/2012",
"5/16/2012", "5/17/2012", "5/18/2012", "5/19/2012", "5/20/2012",
"5/21/2012", "5/22/2012", "5/23/2012", "5/24/2012", "5/25/2012",
"5/26/2012", "5/27/2012", "5/28/2012", "5/29/2012", "5/30/2012",
"5/31/2012", "6/1/2012", "6/10/2012", "6/11/2012", "6/12/2012",
"6/13/2012", "6/14/2012", "6/15/2012", "6/16/2012", "6/17/2012",
"6/18/2012", "6/19/2012", "6/2/2012", "6/20/2012", "6/3/2012",
"6/4/2012", "6/5/2012", "6/6/2012", "6/7/2012", "6/8/2012", "6/9/2012"
), class = "factor"), Obs = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 320L, NA, NA, NA, NA, NA, NA, NA, NA, 321L, 321L,
322L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 323L, NA, NA, NA, NA,
324L, NA, NA, NA, NA), Obs_filled = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 320L, 320L, 320L, NA, NA, NA, NA, NA, NA, 321L,
321L, 321L, 322L, 322L, NA, NA, NA, NA, NA, NA, NA, 323L, 323L,
323L, NA, NA, 324L, 324L, 324L, NA, NA, NA)), .Names = c("Date",
"Obs", "Obs_filled"), class = "data.frame", row.names = c(NA,
-42L))
非常感谢任何帮助。
答案 0 :(得分:2)
一个想法是使用dplyr
。我们创建了两个新列Obs1
,它是Obs
的1位滞后,Obs2
是Obs
的1位置前导。然后我们使用coalesce
来"合并"三个Obs
列在一起。最后,我们删除不需要的列。
library(dplyr)
df %>%
mutate(Obs1 = lag(Obs), Obs2 = lead(Obs), Obs = coalesce(Obs, Obs1, Obs2)) %>%
select(-c(Obs1, Obs2))
#...
#18 5/18/2012 NA
#19 5/19/2012 NA
#20 5/20/2012 320
#21 5/21/2012 320
#22 5/22/2012 320
#23 5/23/2012 NA
#24 5/24/2012 NA
#25 5/25/2012 NA
#26 5/26/2012 NA
#27 5/27/2012 NA
#28 5/28/2012 NA
#29 5/29/2012 321
#30 5/30/2012 321
#31 5/31/2012 321
#32 6/1/2012 322
#33 6/2/2012 322
#34 6/3/2012 NA
#35 6/4/2012 NA
#...