近似日期合并R-滚动加入/日期差

时间:2018-07-13 15:43:32

标签: r

我正在尝试将两个数据框合并在一起,但是需要近似数据的连接(按日期和键)。例如,在“ 2000-03-01”上,df2 $ X1和df2 $ X2具有需要加入df1的信息,但是df1中没有“ 2000-03-01”。 不创建新行,如何为每个键将df1上的相关df2 $ X1和X2数据近似到“ 2000- 02 -01”行上?

df1 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
df1$Key <- LETTERS[1:2]
df1$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-02-01", "2000-04-01", "2000-05-01", 
                     "2000-05-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))

print(df1)

X1 X2 Key       Date
1   4  2   A 2000-01-01
2   3  2   B 2000-02-01
3   2  4   A 2000-02-01
4   3  5   B 2000-04-01
5   1  3   A 2000-05-01
6   2  0   B 2000-05-01
7   1  1   A 2000-07-01
8   5  3   B 2000-08-01
9   2  0   A 2000-09-01
10  5  4   B 2000-10-01

df2 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
df2$Key <- c('A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B')
df2$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-03-01", "2000-04-01", "2000-05-01", 
                     "2000-06-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))

print(df2)

X1 X2 Key       Date
1   4  5   A 2000-01-01
2   3  2   B 2000-02-01
3   1  5   A 2000-03-01
4   1  0   B 2000-04-01
5   0  4   A 2000-05-01
6   5  5   B 2000-06-01
7   2  2   A 2000-07-01
8   1  0   B 2000-08-01
9   0  4   A 2000-09-01
10  3  2   B 2000-10-01

library(dplyr)
join <- left_join(df1, df2, by = c("Key", "Date")); join

X1.x X2.x Key       Date X1.y X2.y
1     4    2   A 2000-01-01    4    5
2     3    2   B 2000-02-01    3    2
3     2    4   A 2000-02-01   NA   NA
4     3    5   B 2000-04-01    1    0
5     1    3   A 2000-05-01    0    4
6     2    0   B 2000-05-01   NA   NA
7     1    1   A 2000-07-01    2    2
8     5    3   B 2000-08-01    1    0
9     2    0   A 2000-09-01    0    4
10    5    4   B 2000-10-01    3    2
> 

join <- left_join(df1, df2, by = c("Key", "Date")) %>%
  filter(abs(difftime(day,day,units = "days"))) <= as.difftime(2, format = "%d", unit = "days")


library(data.table)

as.data.table(df1)[as.data.table(df2), on = "Date", roll = "nearest"]

X1 X2 Key       Date i.X1 i.X2 i.Key
1:  4  2   A 2000-01-01    4    5     A
2:  3  2   B 2000-02-01    3    2     B
3:  2  4   A 2000-02-01    3    2     B
4:  2  4   A 2000-03-01    1    5     A
5:  3  5   B 2000-04-01    1    0     B
6:  1  3   A 2000-05-01    0    4     A
7:  2  0   B 2000-05-01    0    4     A
8:  1  1   A 2000-06-01    5    5     B
9:  1  1   A 2000-07-01    2    2     A
10:  5  3   B 2000-08-01    1    0     B
11:  2  0   A 2000-09-01    0    4     A
12:  5  4   B 2000-10-01    3    2     B

1 个答案:

答案 0 :(得分:0)

怎么样?

df2$FakeDate <- df2$Date[ findInterval(df1$Date, df2$Date) ]
df2
#    X1 X2 Key       Date   FakeDate
# 1   4  3   A 2000-01-01 2000-01-01
# 2   4  0   B 2000-02-01 2000-02-01
# 3   2  5   A 2000-03-01 2000-02-01
# 4   3  1   B 2000-04-01 2000-04-01
# 5   2  5   A 2000-05-01 2000-05-01
# 6   5  3   B 2000-06-01 2000-05-01
# 7   5  0   A 2000-07-01 2000-07-01
# 8   3  0   B 2000-08-01 2000-08-01
# 9   4  5   A 2000-09-01 2000-09-01
# 10  4  0   B 2000-10-01 2000-10-01
df2$Date <- NULL # o/w merge will complain about 'Date' in both
merge(df1, df2, by.x=c("Key","Date"), by.y=c("Key","FakeDate"))
#    Key       Date X1.x X2.x X1.y X2.y
# 1    A 2000-01-01    0    5    4    3
# 2    A 2000-02-01    4    0    2    5
# 3    A 2000-05-01    4    1    2    5
# 4    A 2000-07-01    2    0    5    0
# 5    A 2000-09-01    0    3    4    5
# 6    B 2000-02-01    0    4    4    0
# 7    B 2000-04-01    1    5    3    1
# 8    B 2000-05-01    5    2    5    3
# 9    B 2000-08-01    3    5    3    0
# 10   B 2000-10-01    4    2    4    0