用日期填充缺失值

时间:2021-05-07 09:58:33

标签: r date

我有以下数据框

DirectorID CompanyID start_new    end_new DateStartRole DateEndRole
1           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
2           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
3           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
4           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
5           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
6           36      1505      <NA>       <NA>    2000-01-01  2000-12-31
7           36     17834      <NA>       <NA>    2001-01-01  2011-12-31
8           36     17834      <NA>       <NA>    2001-01-01  2011-12-31
9           36     17834      <NA>       <NA>    2001-01-01  2011-12-31
10          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
11          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
12          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
13          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
14          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
15          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
16          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
17          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
18          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
19          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
20          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
21          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
22          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
23          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
24          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
25          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
26          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
27          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
28          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
structure(list(DirectorID = c(31, 31, 31, 31, 31, 36, 36, 36, 
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36), CompanyID = c(1990357, 
1990357, 1990357, 1990357, 1990357, 1505, 17834, 17834, 17834, 
17834, 17834, 17834, 17834, 17834, 17834, 17834, 17834, 17834, 
17834, 17834), start_new = structure(c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), class = "Date"), end_new = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
    DateStartRole = structure(c(16436, 16436, 16436, 16436, 16436, 
    10957, 11323, 11323, 11323, 11323, 11323, 11323, 11323, 11323, 
    11323, 11323, 11323, 11323, 11323, 11323), class = "Date"), 
    DateEndRole = structure(c(18261, 18261, 18261, 18261, 18261, 
    11322, 15339, 15339, 15339, 15339, 15339, 15339, 15339, 15339, 
    15339, 15339, 15339, 15339, 15339, 15339), class = "Date")), row.names = c(NA, 
20L), class = "data.frame")

变量 DateStartRole 和 DateEndRole 表示整个期间。我希望 start_new 和 end_new 变量的日期范围为 1 年。所需的输出应如下所示

DirectorID CompanyID          start_new    end_new      DateStartRole  DateEndRole
1           31   1990357      2015-01-01   2015-12-31    2015-01-01    2019-12-31
2           31   1990357      2016-01-01   2016-12-31    2015-01-01    2019-12-31
3           31   1990357      2017-01-01   2017-12-31    2015-01-01    2019-12-31
4           31   1990357      2018-01-01   2018-12-31    2015-01-01    2019-12-31
5           31   1990357      2019-01-01   2019-12-31    2015-01-01    2019-12-31

1 个答案:

答案 0 :(得分:0)

您可以创建从 first DateStartRole 到每个 DirectorID 的行数的 1 年序列

library(dplyr)
library(lubridate)

df %>%
  group_by(DirectorID) %>%
  mutate(start_new = seq(first(DateStartRole), by = '1 year', length.out = n()), 
         end_new = lead(start_new, default = last(start_new) + years(1)) - 1) %>%
  ungroup

#   DirectorID CompanyID start_new  end_new    DateStartRole DateEndRole
#        <dbl>     <dbl> <date>     <date>     <date>        <date>     
# 1         31   1990357 2015-01-01 2015-12-31 2015-01-01    2019-12-31 
# 2         31   1990357 2016-01-01 2016-12-31 2015-01-01    2019-12-31 
# 3         31   1990357 2017-01-01 2017-12-31 2015-01-01    2019-12-31 
# 4         31   1990357 2018-01-01 2018-12-31 2015-01-01    2019-12-31 
# 5         31   1990357 2019-01-01 2019-12-31 2015-01-01    2019-12-31 
# 6         36      1505 2000-01-01 2000-12-31 2000-01-01    2000-12-31 
# 7         36     17834 2001-01-01 2001-12-31 2001-01-01    2011-12-31 
# 8         36     17834 2002-01-01 2002-12-31 2001-01-01    2011-12-31 
# 9         36     17834 2003-01-01 2003-12-31 2001-01-01    2011-12-31 
#10         36     17834 2004-01-01 2004-12-31 2001-01-01    2011-12-31 
#11         36     17834 2005-01-01 2005-12-31 2001-01-01    2011-12-31 
#12         36     17834 2006-01-01 2006-12-31 2001-01-01    2011-12-31 
#13         36     17834 2007-01-01 2007-12-31 2001-01-01    2011-12-31 
#14         36     17834 2008-01-01 2008-12-31 2001-01-01    2011-12-31 
#15         36     17834 2009-01-01 2009-12-31 2001-01-01    2011-12-31 
#16         36     17834 2010-01-01 2010-12-31 2001-01-01    2011-12-31 
#17         36     17834 2011-01-01 2011-12-31 2001-01-01    2011-12-31 
#18         36     17834 2012-01-01 2012-12-31 2001-01-01    2011-12-31 
#19         36     17834 2013-01-01 2013-12-31 2001-01-01    2011-12-31 
#20         36     17834 2014-01-01 2014-12-31 2001-01-01    2011-12-31