我正在尝试根据某个日期条件将这两个数据框连接在一起。
我在d1
中有每月的价格/销售数据,我想与d2
一起加入。 d2
是年度数据,而d1
是每月数据。
我想将它们合并在一起,以便在date2
的第一个可观察到的月份加入d2
中的date1
和d1
中的d1
}。
也就是说d1:date1
必须大于d2:date2
。我想加入ID
中的date2
和d2
,并在date1
中加入第一个可用的月份日期d1
。
我希望这能引起人们的注意。
我喜欢用tidyverse做事,因为我觉得它很容易理解,但是过去我遇到了内存问题,因此data.table版本会很棒。
数据中有一些重复项,因此也许使用... %>% distinct(., .keep_all = TRUE)
可能会过滤掉其中一些。
d1-时间序列每月数据
ID date1 prc
1 69331C10 2012-01-31 40.66
2 69331C10 2012-02-29 41.68
3 69331C10 2012-03-30 43.41
4 69331C10 2012-04-30 44.18
5 69331C10 2012-05-31 43.70
6 69331C10 2012-06-29 45.27
d2-
ID date2 var1
1 09179710 2012-02-24 1
2 74979E10 2012-02-23 1
3 98252610 2012-02-24 1
4 89190610 2012-02-28 1
5 83418210 2012-02-23 1
6 71694110 2012-07-16 1
d1 <- structure(list(ID = c("69331C10", "69331C10", "69331C10", "69331C10",
"69331C10", "69331C10", "69331C10", "69331C10", "69331C10", "69331C10",
"69331C10", "69331C10", "69331C10", "69331C10", "69331C10", "69331C10",
"69331C10", "69331C10", "69331C10", "69331C10", "69331C10", "69331C10",
"69331C10", "69331C10", "43258910", "43258910", "43258910", "43258910",
"43258910", "43258910", "43258910", "43258910", "43258910", "43258910",
"43258910", "43258910", "43258910", "43258910", "43258910", "43258910",
"43258910", "43258910", "43258910", "43258910", "43258910", "43258910",
"43258910", "43258910", "89190610", "89190610", "89190610", "89190610",
"89190610", "89190610", "89190610", "89190610", "89190610", "89190610",
"89190610", "89190610", "89190610", "89190610", "89190610", "89190610",
"89190610", "89190610", "89190610", "89190610", "89190610", "89190610",
"89190610", "89190610"), date1 = structure(c(15370, 15399, 15429,
15460, 15491, 15520, 15552, 15583, 15611, 15644, 15674, 15705,
15736, 15764, 15792, 15825, 15856, 15884, 15917, 15947, 15978,
16009, 16038, 16070, 15370, 15399, 15429, 15460, 15491, 15520,
15552, 15583, 15611, 15644, 15674, 15705, 15736, 15764, 15792,
15825, 15856, 15884, 15917, 15947, 15978, 16009, 16038, 16070,
15370, 15399, 15429, 15460, 15491, 15520, 15552, 15583, 15611,
15644, 15674, 15705, 15736, 15764, 15792, 15825, 15856, 15884,
15917, 15947, 15978, 16009, 16038, 16070), class = "Date"), prc = c(40.6599998474121,
41.6800003051758, 43.4099998474121, 44.1800003051758, 43.7000007629395,
45.2700004577637, 46.1599998474121, 43.4099998474121, 42.6699981689453,
42.5200004577637, 40.9500007629395, 40.1800003051758, 42.6399993896484,
42.6399993896484, 44.5299987792969, 48.439998626709, 44.9099998474121,
45.7299995422363, 45.8899993896484, 41.3600006103516, 40.9199981689453,
41.8499984741211, 40.3699989318848, 40.2799987792969, 19.1499996185303,
20.25, 21.5300006866455, 22.0400009155273, 20.8999996185303,
28.9899997711182, 25.6100006103516, 26.0699996948242, 26.7800006866455,
26.0100002288818, 27.8500003814697, 28.1399993896484, 30.9799995422363,
32.4000015258789, 35.1500015258789, 35.9099998474121, 34.6399993896484,
33.0800018310547, 35.2099990844727, 32.310001373291, 30.7399997711182,
32.8300018310547, 33.4199981689453, 33.439998626709, 21.4400005340576,
21.8799991607666, 23.0699996948242, 23.5200004577637, 23.2700004577637,
23.9300003051758, 23.6499996185303, 23.1800003051758, 23.7000007629395,
22.4899997711182, 21.9500007629395, 21.4200000762939, 23.25,
23.7600002288818, 24.7800006866455, 23.6200008392334, 23.5100002288818,
24.4799995422363, 27.4099998474121, 27.6700000762939, 29.4200000762939,
29.8299999237061, 31.0499992370605, 33.2799987792969)), row.names = c(NA,
-72L), class = "data.frame")
d2 <- structure(list(ID = c("09179710", "74979E10", "98252610", "89190610",
"83418210", "71694110", "70455110", "70455110", "69331C10", "43258910"
), date2 = c("2012-02-24", "2012-02-23", "2012-02-24", "2012-02-28",
"2012-02-23", "2012-07-16", "2012-02-01", "2012-02-29", "2012-02-16",
"2012-02-23"), var1 = c(1, 1, 1, 1, 1, 1, 0.999999957207671,
0.999999957207671, 0.998543649717913, 0.986842264036935)), class = "data.frame", row.names = c(NA,
-10L))