我正在尝试将两个数据框合并在一起,但是需要近似数据的连接(按日期和键)。例如,在“ 2000-03-01”上,df2 $ X1和df2 $ X2具有需要加入df1的信息,但是df1中没有“ 2000-03-01”。 不创建新行,如何为每个键将df1上的相关df2 $ X1和X2数据近似到“ 2000- 02 -01”行上?
df1 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
df1$Key <- LETTERS[1:2]
df1$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-02-01", "2000-04-01", "2000-05-01",
"2000-05-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))
print(df1)
X1 X2 Key Date
1 4 2 A 2000-01-01
2 3 2 B 2000-02-01
3 2 4 A 2000-02-01
4 3 5 B 2000-04-01
5 1 3 A 2000-05-01
6 2 0 B 2000-05-01
7 1 1 A 2000-07-01
8 5 3 B 2000-08-01
9 2 0 A 2000-09-01
10 5 4 B 2000-10-01
df2 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
df2$Key <- c('A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B')
df2$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-03-01", "2000-04-01", "2000-05-01",
"2000-06-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))
print(df2)
X1 X2 Key Date
1 4 5 A 2000-01-01
2 3 2 B 2000-02-01
3 1 5 A 2000-03-01
4 1 0 B 2000-04-01
5 0 4 A 2000-05-01
6 5 5 B 2000-06-01
7 2 2 A 2000-07-01
8 1 0 B 2000-08-01
9 0 4 A 2000-09-01
10 3 2 B 2000-10-01
library(dplyr)
join <- left_join(df1, df2, by = c("Key", "Date")); join
X1.x X2.x Key Date X1.y X2.y
1 4 2 A 2000-01-01 4 5
2 3 2 B 2000-02-01 3 2
3 2 4 A 2000-02-01 NA NA
4 3 5 B 2000-04-01 1 0
5 1 3 A 2000-05-01 0 4
6 2 0 B 2000-05-01 NA NA
7 1 1 A 2000-07-01 2 2
8 5 3 B 2000-08-01 1 0
9 2 0 A 2000-09-01 0 4
10 5 4 B 2000-10-01 3 2
>
join <- left_join(df1, df2, by = c("Key", "Date")) %>%
filter(abs(difftime(day,day,units = "days"))) <= as.difftime(2, format = "%d", unit = "days")
library(data.table)
as.data.table(df1)[as.data.table(df2), on = "Date", roll = "nearest"]
X1 X2 Key Date i.X1 i.X2 i.Key
1: 4 2 A 2000-01-01 4 5 A
2: 3 2 B 2000-02-01 3 2 B
3: 2 4 A 2000-02-01 3 2 B
4: 2 4 A 2000-03-01 1 5 A
5: 3 5 B 2000-04-01 1 0 B
6: 1 3 A 2000-05-01 0 4 A
7: 2 0 B 2000-05-01 0 4 A
8: 1 1 A 2000-06-01 5 5 B
9: 1 1 A 2000-07-01 2 2 A
10: 5 3 B 2000-08-01 1 0 B
11: 2 0 A 2000-09-01 0 4 A
12: 5 4 B 2000-10-01 3 2 B
答案 0 :(得分:0)
怎么样?
df2$FakeDate <- df2$Date[ findInterval(df1$Date, df2$Date) ]
df2
# X1 X2 Key Date FakeDate
# 1 4 3 A 2000-01-01 2000-01-01
# 2 4 0 B 2000-02-01 2000-02-01
# 3 2 5 A 2000-03-01 2000-02-01
# 4 3 1 B 2000-04-01 2000-04-01
# 5 2 5 A 2000-05-01 2000-05-01
# 6 5 3 B 2000-06-01 2000-05-01
# 7 5 0 A 2000-07-01 2000-07-01
# 8 3 0 B 2000-08-01 2000-08-01
# 9 4 5 A 2000-09-01 2000-09-01
# 10 4 0 B 2000-10-01 2000-10-01
df2$Date <- NULL # o/w merge will complain about 'Date' in both
merge(df1, df2, by.x=c("Key","Date"), by.y=c("Key","FakeDate"))
# Key Date X1.x X2.x X1.y X2.y
# 1 A 2000-01-01 0 5 4 3
# 2 A 2000-02-01 4 0 2 5
# 3 A 2000-05-01 4 1 2 5
# 4 A 2000-07-01 2 0 5 0
# 5 A 2000-09-01 0 3 4 5
# 6 B 2000-02-01 0 4 4 0
# 7 B 2000-04-01 1 5 3 1
# 8 B 2000-05-01 5 2 5 3
# 9 B 2000-08-01 3 5 3 0
# 10 B 2000-10-01 4 2 4 0