我有两个数据框,如下所示:
> df_X
X1 X2 X3 X5
1 2017-05-02 21 1 NA
2 2017-05-02 10 1 NA
3 2017-05-02 5 1 NA
4 2017-05-02 5 1 NA
5 2017-05-02 25 1 NA
6 2017-05-02 20 1 NA
7 2017-05-04 25 1 NA
8 2017-05-08 25 1 NA
9 2017-05-10 20 1 NA
10 2017-05-13 30 1 NA
11 2017-05-17 5 1 NA
12 2017-05-20 25 1 NA
13 2017-05-23 15 1 NA
14 2017-05-25 20 1 NA
15 2017-05-27 35 1 NA
16 2017-05-30 30 1 NA
17 2017-06-01 10 1 NA
18 2017-06-03 10 1 NA
19 2017-06-05 5 1 NA
20 2017-06-08 10 1 NA
和
> df_Y
Y1 Y2 Y3 Y4
1 1 0.6656641 2017-08-01 2017-08-02
2 1 1.0285626 2017-08-17 2017-08-18
3 1 1.1905584 2017-08-21 2017-08-22
4 1 1.1026008 2017-08-24 2017-08-25
5 1 1.1570583 2017-08-28 2017-08-29
6 1 1.0676820 2017-08-31 2017-09-01
7 2 1.1003816 2017-08-31 2017-09-01
8 2 1.1901956 2017-09-04 2017-09-05
9 1 1.1636756 2017-09-05 2017-09-05
10 1 1.1724968 2017-09-07 2017-09-07
11 2 1.1335978 2017-09-08 2017-09-09
12 1 1.0873912 2017-09-08 2017-09-08
13 1 1.1238168 2017-09-11 2017-09-11
14 2 1.1564612 2017-09-12 2017-09-13
15 2 1.1688628 2017-09-14 2017-09-15
16 1 1.2326152 2017-09-14 2017-09-15
17 2 1.2014378 2017-09-16 2017-09-16
18 2 1.1911753 2017-09-19 2017-09-19
19 1 1.1445341 2017-09-19 2017-09-20
20 1 1.1299295 2017-09-21 2017-09-22
我想将X5
的值替换为Y2
的值,其中X1
介于Y3
和Y4
之间,{{1}匹配X3
。
使用循环很容易实现:
Y1
完成整个数据帧的循环需要很长时间,所以我想知道是否有一个矢量化解决方案,可能使用for (i in 1:NROW(df_Y)) {
df_X$X5[(df_X$X1 >= df_Y$Y3[i]) &
(df_X$X1 <= df_Y$Y4[i]) & (df_X$X3 == df_Y$Y1[i])] <- df_Y$Y2[i]
}
> df_X_result
X1 X2 X3 X5
1 2017-05-02 21 1 NA
...
26 2017-08-13 11 1 NA
27 2017-08-18 10 1 1.0285626
28 2017-08-21 5 1 1.1905584
29 2017-08-23 5 1 NA
30 2017-08-25 20 1 1.1026008
31 2017-08-28 10 1 1.1570583
32 2017-08-30 3 1 NA
33 2017-08-31 10 1 1.0676820
34 2017-09-04 18 1 NA
35 2017-09-05 25 1 1.1636756
36 2017-09-07 30 1 1.1724968
37 2017-09-08 20 1 1.0873912
38 2017-09-11 20 1 1.1238168
39 2017-09-13 5 1 NA
40 2017-09-14 25 1 1.2326152
41 2017-09-15 30 1 1.2326152
?
group_by()
df_X <- structure(list(X1 = c("2017-05-02", "2017-05-02", "2017-05-02",
"2017-05-02", "2017-05-02", "2017-05-02", "2017-05-04", "2017-05-08",
"2017-05-10", "2017-05-13", "2017-05-17", "2017-05-20", "2017-05-23",
"2017-05-25", "2017-05-27", "2017-05-30", "2017-06-01", "2017-06-03",
"2017-06-05", "2017-06-08", "2017-06-10", "2017-06-14", "2017-06-16",
"2017-07-15", "2017-08-09", "2017-08-13", "2017-08-18", "2017-08-21",
"2017-08-23", "2017-08-25", "2017-08-28", "2017-08-30", "2017-08-31",
"2017-09-04", "2017-09-05", "2017-09-07", "2017-09-08", "2017-09-11",
"2017-09-13", "2017-09-14", "2017-09-15", "2017-09-18", "2017-09-19",
"2017-09-21", "2017-09-23", "2017-09-25", "2017-09-26", "2017-09-28",
"2017-09-29", "2017-09-30", "2017-10-02", "2017-10-03", "2017-10-04",
"2017-10-05", "2017-10-06", "2017-10-09", "2017-10-10", "2017-10-11",
"2017-10-12", "2017-10-12", "2017-10-14", "2017-10-16", "2017-10-18",
"2017-10-20", "2017-10-21", "2017-10-24", "2017-10-27", "2017-10-28",
"2017-10-31", "2017-11-02", "2017-11-03", "2017-11-06", "2017-11-08",
"2017-05-02", "2017-05-02", "2017-05-02", "2017-05-02", "2017-05-02",
"2017-05-03", "2017-05-05", "2017-05-09", "2017-05-12", "2017-05-16",
"2017-05-19", "2017-05-22", "2017-05-25", "2017-05-27", "2017-05-29",
"2017-05-31", "2017-06-02", "2017-06-05", "2017-06-07", "2017-06-14",
"2017-06-16", "2017-06-19", "2017-09-01", "2017-09-05", "2017-09-07",
"2017-09-08", "2017-09-11", "2017-09-12", "2017-09-14", "2017-09-15",
"2017-09-18", "2017-09-19", "2017-09-21", "2017-09-22", "2017-09-25",
"2017-09-27", "2017-09-29", "2017-09-30", "2017-10-02", "2017-10-03",
"2017-10-04", "2017-10-05", "2017-10-06", "2017-10-09", "2017-10-09",
"2017-10-11", "2017-10-11", "2017-10-12", "2017-10-14", "2017-10-16",
"2017-10-17", "2017-10-19", "2017-10-20", "2017-10-21", "2017-10-25",
"2017-10-27", "2017-10-31", "2017-11-01", "2017-11-02", "2017-11-03",
"2017-11-06", "2017-11-08"), X2 = c(21, 10, 5, 5, 25, 20, 25,
25, 20, 30, 5, 25, 15, 20, 35, 30, 10, 10, 5, 10, 5, 10, 15,
7, 23, 11, 10, 5, 5, 20, 10, 3, 10, 18, 25, 30, 20, 20, 5, 25,
30, 15, 10, 20, 29, 25, 30, 10, 10, 5, 5, 30, 15, 25, 25, 5,
30, 5, 10, 3, 20, 55, 10, 15, 30, 20, 30, 10, 20, 25, 25, 15,
20, 35, 20, 10, 15, 10, 15, 5, 5, 5, 25, 20, 5, 15, 30, 20, 20,
10, 20, 5, 20, 15, 5, 5, 5, 25, 25, 10, 10, 5, 5, 30, 25, 10,
5, 20, 10, 30, 20, 25, 10, 20, 15, 10, 5, 30, 5, 10, 30, 3, 10,
10, 40, 20, 24, 30, 40, 15, 10, 10, 5, 15, 30), X3 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2), X5 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("X1",
"X2", "X3", "X5"), row.names = c(NA, 135L), class = "data.frame")
答案 0 :(得分:1)
使用data.table的非equi连接功能时这很简单:
library(data.table)
# convert to data.table
setDT(df_X)
setDT(df_Y)
# data preparation:
df_X[, X1 := as.IDate(X1)]
df_Y[, c("Y3", "Y4") := lapply(.SD, as.IDate), .SDcols = c("Y3", "Y4")]
df_Y[, Y1 := as.numeric(Y1)]
df_X[, X5 := as.numeric(X5)]
# non-equi join + update by reference
df_X[df_Y, on = .(X3 = Y1, X1 >= Y3, X1 <= Y4), X5 := i.Y2] # <-- the main part
# convert back to data.frame
setDF(df_X)
setDF(df_Y)