在R中使用for循环创建滞后

时间:2014-06-01 07:47:45

标签: r date for-loop dataframe

我有几个公司的股票数据data.frame(这里只有两家)。对于我的活动数据框架中的两家公司,我希望在我的库存data.frame df中添加10个额外的列,其中包含滞后日期(从-5天到+5天)。我使用for循环可能不是最好的解决方案,但它可以部分工作。

DATE <- c("01.01.2000","02.01.2000","03.01.2000","06.01.2000","07.01.2000","09.01.2000","10.01.2000","01.01.2000","02.01.2000","04.01.2000","06.01.2000","07.01.2000","09.01.2000","10.01.2000")
RET <- c(-2.0,1.1,3,1.4,-0.2, 0.6, 0.1, -0.21, -1.2, 0.9, 0.3, -0.1,0.3,-0.12)
COMP <- c("A","A","A","A","A","A","A","B","B","B","B","B","B","B")
df <- data.frame(DATE, RET, COMP, stringsAsFactors=F)

df

# DATE   RET COMP
# 1  01.01.2000 -2.00    A
# 2  02.01.2000  1.10    A
# 3  03.01.2000  3.00    A
# 4  06.01.2000  1.40    A
# 5  07.01.2000 -0.20    A
# 6  09.01.2000  0.60    A
# 7  10.01.2000  0.10    A
# 8  01.01.2000 -0.21    B
# 9  02.01.2000 -1.20    B
# 10 04.01.2000  0.90    B
# 11 06.01.2000  0.30    B
# 12 07.01.2000 -0.10    B
# 13 09.01.2000  0.30    B
# 14 10.01.2000 -0.12    B

这个循环工作正常

comp <- as.vector(unique(df$COMP))
mylist <- vector('list', length(comp))

# create lags in DATE
for(i in 1:length(comp)) {
  print(i)
  comp_i <- comp[i]
  df_k <- df[df$COMP %in% comp_i, ] # all trading days of one firm

  df_k <- transform(df_k, 
                      DATEm1 = c(NA, head(DATE, -1)), 
                      DATEm2 = c(NA, NA, head(DATE, -2)), 
                      DATEm3 = c(NA, NA, NA, head(DATE, -3)),
                      DATEm4 = c(NA, NA, NA, NA,head(DATE, -4)), 
                      DATEm5 = c(NA, NA, NA, NA, NA, head(DATE, -5)),
                      DATEp1 = c(DATE[-1], NA))
                     #DATEp2 = c(DATE[-2], NA, NA),
                     #DATEp3 = c(DATE[-3], NA, NA, NA),
                     #DATEp4 = c(DATE[-4], NA, NA, NA, NA),
                     #DATEp5 = c(DATE[-5], NA, NA, NA, NA, NA))

  mylist[[i]] = df_k
} 

df1 <- do.call(rbind, mylist)

但是如果我添加DATEp2, DATEp3, DATEp4, DATEp5行。代码不起作用。任何人都可以告诉我这里我做错了什么吗?这里的代码包含所有滞后日期。

# create lags in DATE
for(i in 1:length(comp)) {
  print(i)
  comp_i <- comp[i]
  df_k <- df[df$COMP %in% comp_i, ] # all trading days of one firm

  df_k <- transform(df_k, 
                      DATEm1 = c(NA, head(DATE, -1)), 
                      DATEm2 = c(NA, NA, head(DATE, -2)), 
                      DATEm3 = c(NA, NA, NA, head(DATE, -3)),
                      DATEm4 = c(NA, NA, NA, NA,head(DATE, -4)), 
                      DATEm5 = c(NA, NA, NA, NA, NA, head(DATE, -5)),
                      DATEp1 = c(DATE[-1], NA),
                      DATEp2 = c(DATE[-2], NA, NA),
                      DATEp3 = c(DATE[-3], NA, NA, NA),
                      DATEp4 = c(DATE[-4], NA, NA, NA, NA),
                      DATEp5 = c(DATE[-5], NA, NA, NA, NA, NA))

  mylist[[i]] = df_k
} 

df1 <- do.call(rbind, mylist)

2 个答案:

答案 0 :(得分:2)

除了@ DavidArenburg的评论,这似乎是对我的回答,我想向您展示一种不同的方式,为您的数据添加滞后和领先日期。您可以使用dplyr

require(dplyr)

df1 <- df %.% group_by(COMP) %.%
          mutate(DATEm1 = lag(DATE, 1),
                 DATEm2 = lag(DATE, 2),
                 DATEm3 = lag(DATE, 3),
                 DATEm4 = lag(DATE, 4),
                 DATEm5 = lag(DATE, 5),
                 DATEp1 = lead(DATE,1),
                 DATEp2 = lead(DATE,2),
                 DATEp3 = lead(DATE,3),
                 DATEp4 = lead(DATE,4),
                 DATEp5 = lead(DATE,5))

答案 1 :(得分:1)

@beginneR的精彩回答。这是完整性的data.table答案。

#Load data.table package
require("data.table")
dt <- data.table(df)

#Make lag and lead variables
dt[, c(paste("Datem", 1:5, sep="")) := lapply(1:5, function(i) c(rep(NA, i), head(DATE, -i))), by=list(COMP)]
dt[, c(paste("Datep", 1:5, sep="")) := lapply(1:5, function(i) c(tail(DATE, -i), rep(NA, i))), by=list(COMP)]