我有几个公司的股票数据data.frame
(这里只有两家)。对于我的活动数据框架中的两家公司,我希望在我的库存data.frame
df
中添加10个额外的列,其中包含滞后日期(从-5天到+5天)。我使用for循环可能不是最好的解决方案,但它可以部分工作。
DATE <- c("01.01.2000","02.01.2000","03.01.2000","06.01.2000","07.01.2000","09.01.2000","10.01.2000","01.01.2000","02.01.2000","04.01.2000","06.01.2000","07.01.2000","09.01.2000","10.01.2000")
RET <- c(-2.0,1.1,3,1.4,-0.2, 0.6, 0.1, -0.21, -1.2, 0.9, 0.3, -0.1,0.3,-0.12)
COMP <- c("A","A","A","A","A","A","A","B","B","B","B","B","B","B")
df <- data.frame(DATE, RET, COMP, stringsAsFactors=F)
df
# DATE RET COMP
# 1 01.01.2000 -2.00 A
# 2 02.01.2000 1.10 A
# 3 03.01.2000 3.00 A
# 4 06.01.2000 1.40 A
# 5 07.01.2000 -0.20 A
# 6 09.01.2000 0.60 A
# 7 10.01.2000 0.10 A
# 8 01.01.2000 -0.21 B
# 9 02.01.2000 -1.20 B
# 10 04.01.2000 0.90 B
# 11 06.01.2000 0.30 B
# 12 07.01.2000 -0.10 B
# 13 09.01.2000 0.30 B
# 14 10.01.2000 -0.12 B
这个循环工作正常
comp <- as.vector(unique(df$COMP))
mylist <- vector('list', length(comp))
# create lags in DATE
for(i in 1:length(comp)) {
print(i)
comp_i <- comp[i]
df_k <- df[df$COMP %in% comp_i, ] # all trading days of one firm
df_k <- transform(df_k,
DATEm1 = c(NA, head(DATE, -1)),
DATEm2 = c(NA, NA, head(DATE, -2)),
DATEm3 = c(NA, NA, NA, head(DATE, -3)),
DATEm4 = c(NA, NA, NA, NA,head(DATE, -4)),
DATEm5 = c(NA, NA, NA, NA, NA, head(DATE, -5)),
DATEp1 = c(DATE[-1], NA))
#DATEp2 = c(DATE[-2], NA, NA),
#DATEp3 = c(DATE[-3], NA, NA, NA),
#DATEp4 = c(DATE[-4], NA, NA, NA, NA),
#DATEp5 = c(DATE[-5], NA, NA, NA, NA, NA))
mylist[[i]] = df_k
}
df1 <- do.call(rbind, mylist)
但是如果我添加DATEp2, DATEp3, DATEp4, DATEp5
行。代码不起作用。任何人都可以告诉我这里我做错了什么吗?这里的代码包含所有滞后日期。
# create lags in DATE
for(i in 1:length(comp)) {
print(i)
comp_i <- comp[i]
df_k <- df[df$COMP %in% comp_i, ] # all trading days of one firm
df_k <- transform(df_k,
DATEm1 = c(NA, head(DATE, -1)),
DATEm2 = c(NA, NA, head(DATE, -2)),
DATEm3 = c(NA, NA, NA, head(DATE, -3)),
DATEm4 = c(NA, NA, NA, NA,head(DATE, -4)),
DATEm5 = c(NA, NA, NA, NA, NA, head(DATE, -5)),
DATEp1 = c(DATE[-1], NA),
DATEp2 = c(DATE[-2], NA, NA),
DATEp3 = c(DATE[-3], NA, NA, NA),
DATEp4 = c(DATE[-4], NA, NA, NA, NA),
DATEp5 = c(DATE[-5], NA, NA, NA, NA, NA))
mylist[[i]] = df_k
}
df1 <- do.call(rbind, mylist)
答案 0 :(得分:2)
除了@ DavidArenburg的评论,这似乎是对我的回答,我想向您展示一种不同的方式,为您的数据添加滞后和领先日期。您可以使用dplyr
。
require(dplyr)
df1 <- df %.% group_by(COMP) %.%
mutate(DATEm1 = lag(DATE, 1),
DATEm2 = lag(DATE, 2),
DATEm3 = lag(DATE, 3),
DATEm4 = lag(DATE, 4),
DATEm5 = lag(DATE, 5),
DATEp1 = lead(DATE,1),
DATEp2 = lead(DATE,2),
DATEp3 = lead(DATE,3),
DATEp4 = lead(DATE,4),
DATEp5 = lead(DATE,5))
答案 1 :(得分:1)
@beginneR的精彩回答。这是完整性的data.table
答案。
#Load data.table package
require("data.table")
dt <- data.table(df)
#Make lag and lead variables
dt[, c(paste("Datem", 1:5, sep="")) := lapply(1:5, function(i) c(rep(NA, i), head(DATE, -i))), by=list(COMP)]
dt[, c(paste("Datep", 1:5, sep="")) := lapply(1:5, function(i) c(tail(DATE, -i), rep(NA, i))), by=list(COMP)]