这是我的数据的副本:
structure(list(date = structure(c(8596, 8631, 8659, 8687, 8733,
8743, 8796, 8806, 8853, 8880, 8908, 8932, 8971, 8999, 9027, 9069,
9097, 9111, 9160, 9188, 9212, 9230, 9279, 9309, 9328, 9363, 9391,
9434, 9449, 9482, 9519, 9541, 9580, 9610, 9643, 9672, 9708, 9736,
9764, 9799, 9827, 9850, 9890, 9920, 9947, 9975, 10007, 10038,
10072, 10100, 10122, 10163, 10191, 10213, 10254, 10282, 10310,
10345, 10354, 10385, 10418, 10469, 10497, 10528, 10556, 10570,
10612, 10641, 10668, 10710, 10742, 10759, 10802, 10830, 10858,
10893, 10914, 10947, 10984, 11010, 11038, 11066, 11096, 11135,
11164, 11193, 11229, 11257, 11285, 11313, 11346, 11374, 11411,
11435, 11467, 11502, 11514, 11565, 11592, 11621, 11649, 11677,
11718, 11746, 11776, 11797, 11838, 11867, 11894, 11923, 11951,
11979, 12021, 12035, 12077, 12105, 12133, 12160, 12189, 12231,
12259, 12273, 12315, 12356, 12385, 12399, 12441, 12472, 12497,
12538, 12553, 12591, 12630, 12658, 12686, 12714, 12742, 12770,
12804, 12832, 12860, 12903, 12917, 12938, 12986, 13015, 13056,
13085, 13116, 13139, 13169, 13204, 13232, 13260, 13288, 13301,
13357, 13385, 13414, 13442, 13470, 13498, 13533, 13561, 13603,
13631, 13658, 13694, 13722, 13750, 13778, 13805, 13846, 13862,
13896, 13925, 13967, 13995, 14009, 14050, 14078, 14121, 14149,
14177, 14205, 14233, 14268, 14296, 14323, 14352, 14380, 14449,
14474, 14506, 14548, 14575, 14590, 14618, 14661, 14688, 14729,
14758, 14761, 14821, 14849, 14877, 14905, 14933, 14961, 14995,
15024, 15038, 15093, 15121, 15135, 15185, 15212, 15241, 15269,
15297, 15325, 15360, 15387, 15430, 15458, 15485, 15513, 15542,
15583, 15611, 15639, 15667, 15696, 15731, 15745, 15786, 15815,
15842, 15917, 15945, 15966, 16001, 16030, 16076, 16129, 16143,
16184, 16276, 16303, 16343, 16374, 16400, 16417, 16455, 16482,
16525, 16553, 16585, 16612, 16646, 16678, 16706, 16729, 16752,
16777, 16819, 16860, 16891, 16916, 16925, 16976, 17002, 17042,
17072, 17100, 17120, 17141, 17178, 17224, 17245, 17261, 17304,
17330, 17373, 17401, 17459, 17488, 17512, 17548, 17581, 17598,
17631), tzone = "UTC", tclass = "Date", class = "Date"), AverageTemp = c(16.5027083333333,
17.325, 17.1888888888889, 15.8277777777778, 16.6583333333333,
17.3333333333333, 16.64375, 17.1133333333333, 17.895119047619,
18.5694444444444, 18.8222222222222, 17.4305555555556, 17.6555555555556,
17.025, 17.3222222222222, 17.2770833333333, 17.4805555555556,
16.9708333333333, 17.9666666666667, 17.1222222222222, 18.0166666666667,
17.25, 18.1875, 17.6577777777778, 16.6541666666667, 17.1083333333333,
16.4666666666667, 17.5972756410256, 17.2, 17.4444444444444, 16.95,
17.7, 17.9222222222222, 18.4875, 17.8229166666667, 16.9166666666667,
16.7083333333333, 17.1666666666667, 17.3111111111111, 18.2333333333333,
16.6277777777778, 17.5875, 17.3833333333333, 17.4638888888889,
17.725, 18.1388888888889, 17.7001111111111, 17.7222222222222,
17.2041666666667, 17.8255952380952, 17.1833333333333, 17.8103070175439,
17.8194444444444, 17.952, 18.158412414966, 18.4910714285714,
18.3488562091503, 19.1341830065359, 18.45, 18.9107142857143,
17.2275, 19.0828761904762, 18.1599701591512, 18.965739220457,
18.6720606060606, 18.8786057692308, 18.602656449553, 18.6327347883598,
19.2925198412698, 20.1952463624339, 18.8900384227765, 18.0934444444444,
18.0554871794872, 17.8405270655271, 17.5540598290598, 17.454122110648,
17.5764155982906, 16.9989942528736, 16.4252032967033, 16.5388571428571,
17.0108695652174, 17.7725308641975, 18.4252564102564, 17.2278899240856,
17.3102091315453, 17.3627204585538, 17.280641025641, 17.3746616809117,
17.3014601139601, 17.2238271604938, 16.379012345679, 16.6044444444444,
17.624415954416, 18.4023148148148, 18.0341435185185, 17.3016666666667,
17.8204861111111, 17.827264957265, 17.2772467320261, 17.8786954365079,
17.84375, 17.1732638888889, 16.9219907407407, 17.3826388888889,
17.7413333333333, 18.4948412698413, 18.2363425925926, 17.3282057823129,
17.5083333333333, 17.414898989899, 16.9453125, 17.4988095238095,
17.6704012345679, 18.1333333333333, 18.11875, 17.4805555555556,
17.4271367521368, 17.9006944444444, 17.9818181818182, 17.3125,
16.73625, 17.2666666666667, 17.4279340277778, 17.8584444444444,
17.2966666666667, 17.1, 18.3420833333333, 18.5814285714286, 17.6430555555556,
18.2307122507123, 18.0830687830688, 16.7563492063492, 16.9055555555556,
17.0090277777778, 17.3863095238095, 16.9139880952381, 16.7479166666667,
17.0888888888889, 17.7648148148148, 18.2277777777778, 19.3694444444444,
17.7064021164021, 18.7371527777778, 17.94375, 17.9416666666667,
17.8736111111111, 18.5354166666667, 18.1919444444444, 18.2555555555556,
17.7704365079365, 17.3509259259259, 17.3931216931217, 18.3355923202614,
17.9180555555556, 18.2104166666667, 18.0171121593291, 17.6840277777778,
17.5509259259259, 16.9631313131313, 17.4478070175439, 17.6916666666667,
17.6143376068376, 18.7415656565657, 19.0048611111111, 18.285462962963,
18.3816964285714, 18.2041310541311, 17.2343518518519, 17.2149382716049,
17.3684027777778, 17.5229861111111, 16.8517857142857, 19.0929141414141,
19.300404040404, 18.735, 17.9280277777778, 18.4470274170274,
19.0686597406425, 18.325, 18.5, 18.4388888888889, 18.7291666666667,
18.3708333333333, 18.0234918630752, 19.4925980392157, 19.2101488095238,
19.3890625, 18.5150793650794, 19.1944444444444, 19.0815277777778,
19.5192658730159, 17.2212418300654, 17.8081168831169, 18.2517361111111,
17.7775555555556, 18.012962962963, 17.0347222222222, 16.5888888888889,
18.8123101604278, 18.9187091503268, 19.0161111111111, 19.2625,
20.875, 18.8092592592593, 18.6526515151515, 18.9083333333333,
18.9835227272727, 18.1829292929293, 17.9060606060606, 17.7835227272727,
17.8237719298246, 19.7386363636364, 18.4961051693405, 18.5332727272727,
18.3787878787879, 18.5134199134199, 17.8098930481283, 18.4179292929293,
17.230303030303, 18.9035064935065, 17.8935897435897, 17.6211966604824,
17.9238095238095, 18.8382886904762, 19.42625, 18.6395833333333,
18.0652777777778, 19.3354166666667, 18.75359375, 17.951123043623,
17.6063068181818, 17.828022875817, 17.5528846153846, 18.5647727272727,
19.0318181818182, 19.1659090909091, 18.8997564935065, 19.1301136363636,
18.1705882352941, 17.1361570247934, 18.6090909090909, 18.1429951690821,
17.8829545454545, 18.3387983091787, 18.41875, 19.7, 20.2508333333333,
17.6387426900585, 18.1770897832817, 17.5400297619048, 17.7547246376812,
17.246412037037, 17.0846153846154, 17.7060185185185, 18.325,
18.5408333333333, 19.4251587301587, 18.3706018518519, 17.917,
17.91, 18.6451388888889, 18.29375, 17.2316666666667, 18.7189393939394,
18.1669193548387, 18.367297979798, 17.7043055555556, 18.1879520697168,
19.12, 20.425, 18.6663888888889, 17.5108796296296, 18.1883333333333,
18.3060049019608, 18.32625, 18.2861111111111, 18.0375, 17.3445175438596,
18.6451058201058, 18.97875, 19.4583333333333, 18.2597222222222,
19.9197222222222, 18.2342307692308, 18.7666666666667, 19.8277777777778,
17.6464285714286, 18.690873015873, 18.4520833333333, 19.8696428571429,
19.9833333333333, 18.2416666666667)), class = "data.frame", row.names = c(NA,
-292L))
我的数据采用YYYY-MM-DD格式,并且是每月数据。目前,有几个月的数据缺失(例如2017-09、2014-05、2014-06、2013-12),但未在数据框中指定。如何为整个数据集中可能丢失的月份创建新行?由于我的数据集有两列,因此除了日期列之外的另一列应具有为新的缺失月份行指定的NA值。我正在寻找tidyverse,lubridate或data.table解决方案。
答案 0 :(得分:1)
您可以为此使用tidyr::complete
,但是您还有额外的皱纹,因为每个月的不同日期都有约会。首先,您需要创建一个列来数月,我们可以使用day(x) <-
中的lubridate
设置器来完成此操作。
这里是一个示例,其中使用的数据为了简洁起见被截断为2014年。请注意,您应该使用seq.Date
来指定要包含在month列中的全部日期,并且NA
列中还将有date
个。 (如果需要,您可以替换为每月的第一天)
library(tidyverse)
library(lubridate)
tbl <- structure(list(date = structure(c(16076, 16129, 16143, 16184, 16276, 16303, 16343, 16374, 16400, 16417), tzone = "UTC", tclass = "Date", class = "Date"), AverageTemp = c(18.3387983091787, 18.41875, 19.7, 20.2508333333333, 17.6387426900585, 18.1770897832817, 17.5400297619048, 17.7547246376812, 17.246412037037, 17.0846153846154)), row.names = c(NA, -10L), class = "data.frame")
tbl %>%
mutate(month = date %>% `day<-`(1)) %>%
complete(month = seq.Date(min(month), max(month), by = "month"))
#> # A tibble: 12 x 3
#> month date AverageTemp
#> <date> <date> <dbl>
#> 1 2014-01-01 2014-01-06 18.3
#> 2 2014-02-01 2014-02-28 18.4
#> 3 2014-03-01 2014-03-14 19.7
#> 4 2014-04-01 2014-04-24 20.3
#> 5 2014-05-01 NA NA
#> 6 2014-06-01 NA NA
#> 7 2014-07-01 2014-07-25 17.6
#> 8 2014-08-01 2014-08-21 18.2
#> 9 2014-09-01 2014-09-30 17.5
#> 10 2014-10-01 2014-10-31 17.8
#> 11 2014-11-01 2014-11-26 17.2
#> 12 2014-12-01 2014-12-13 17.1
作为替代,您可以只获取年和月组成部分,并在两者的组合上使用complete
:
tbl %>%
mutate(year = year(date), month = month(date)) %>%
complete(year = min(year):max(year), month = 1:12)
#> # A tibble: 12 x 4
#> year month date AverageTemp
#> <dbl> <dbl> <date> <dbl>
#> 1 2014 1 2014-01-06 18.3
#> 2 2014 2 2014-02-28 18.4
#> 3 2014 3 2014-03-14 19.7
#> 4 2014 4 2014-04-24 20.3
#> 5 2014 5 NA NA
#> 6 2014 6 NA NA
#> 7 2014 7 2014-07-25 17.6
#> 8 2014 8 2014-08-21 18.2
#> 9 2014 9 2014-09-30 17.5
#> 10 2014 10 2014-10-31 17.8
#> 11 2014 11 2014-11-26 17.2
#> 12 2014 12 2014-12-13 17.1
由reprex package(v0.2.1)于2019-03-20创建