我想按日期合并两个data.frames。数据包含给定股票每个交易日的股票数据。活动包含有关公司的新闻。有些新闻是在非交易日公布的,因此今天没有库存数据。例如,04.01.2000公司的一些新闻已经发布。我想将这篇文章与NEXT交易日的返回合并,在这种情况下,返回06.01.2000。那么我怎样才能在合并时跳到下一个交易日?
date1 <- c("01.01.2000","02.01.2000","03.01.2000","06.01.2000","07.01.2000","09.01.2000")
ret1 <- c(-2.0,1.1,3,1.4,-0.2, 0.6)
data <- data.frame(date1, ret1)
data
# date1 ret1
# 1 01.01.2000 -2.0
# 2 02.01.2000 1.1
# 3 03.01.2000 3.0
# 4 06.01.2000 1.4
# 5 07.01.2000 -0.2
# 6 09.01.2000 0.6
date2 <- c("02.01.2000","03.01.2000","04.01.2000","08.01.2000")
news2 <- c("blabla1", "blabla2","blabla3","blabla4")
events <- data.frame(date2, news2)
events
# date2 news2
# 1 02.01.2000 blabla1
# 2 03.01.2000 blabla2
# 3 04.01.2000 blabla3
# 4 08.01.2000 blabla4
输出应如下所示:
# date news ret
# 1 02.01.2000 blabla1 1.1
# 2 03.01.2000 blabla2 3.0
# 3 06.01.2000 blabla3 1.4
# 4 09.01.2000 blabla4 0.6
谢谢!
答案 0 :(得分:5)
使用data.table
包滚动连接可以非常轻松地完成此操作:
require(data.table) ## 1.9.2
setDT(data) ## convert to data.table by reference
setDT(events) ## ,,
setkey(data, date1) ## set the column to join on, also sorts by 'date1'
setkey(events, date2) ## ,, also sorts by 'date2'
data[, date := date1] ## create an extra column
# Now roll join
data[events, roll=-Inf]
# date1 ret1 date news2
# 1: 02.01.2000 1.1 02.01.2000 blabla1
# 2: 03.01.2000 3.0 03.01.2000 blabla2
# 3: 04.01.2000 1.4 06.01.2000 blabla3
# 4: 08.01.2000 0.6 09.01.2000 blabla4
答案 1 :(得分:0)
对于不情愿的data.table
爱好者来说,这是一种长期不优雅的方法
以R日期格式#convert date1和date2
data$date1=as.Date(data$date1,"%d.%m.%Y")
events$date2=as.Date(events$date2,"%d.%m.%Y")
#For dates of returns which are not in events, find the closest previous date from events
prev.df<-do.call(rbind,lapply(data$date1[!data$date1 %in% events$date2],function(x) { data.frame(date=x,prevd=as.numeric(max(events$date2[events$date2<x],na.rm=TRUE))) }))
prev.finite<-prev.df[is.finite(prev.df$prevd),] #remove -Inf/NA values created due to missing values
prev.finite$prevd<-as.Date(prev.finite$prevd,origin="1970-01-01") # convert to date format
prev.nodup<-prev.finite[!duplicated(prev.finite$prevd),] #remove duplicates
events.new<-events #create dummy events dataset for merging
events.new$date2[events.new$date2 %in% prev.nodup$prevd]<-prev.nodup$date[prev.nodup$prevd %in% events.new$date2]
colnames(events.new)[1]<-"date1" #change columnd name for merge with "data"
events.final<-merge(events.new,data,by="date1") #merge with common column "date1"
> events.final #final dataset
date1 news2 ret1
1 2000-01-02 blabla1 1.1
2 2000-01-03 blabla2 3.0
3 2000-01-06 blabla3 1.4
4 2000-01-09 blabla4 0.6