如何将每次迭代的循环输出存储到数据框

时间:2017-10-17 18:39:48

标签: r function loops for-loop

好的,我有一个循环可以计算出股票价格系列的年化/累计回报。

我希望在许多文件上做同样的事情。所以做了一个循环来做到这一点。

首先是一些虚拟数据:

    # Create dummy data
    # Use lubridate to change timestamp to date format
    # Use dplyr to arrange by ascending order
    # Use fread from data.table to read .csv to data frame
    require(lubridate)
    require(data.table)
    require(dplyr)
    MSFT <- fread("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=6RSYX9BPXKZVXUS9&datatype=csv")
    MSFT$timestamp <- ymd(MSFT$timestamp)
    MSFT <- arrange(MSFT,timestamp)
    AAPL <- fread("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=AAPL&outputsize=full&apikey=6RSYX9BPXKZVXUS9&datatype=csv")
    AAPL$timestamp <- ymd(AAPL$timestamp)
    AAPL <- arrange(AAPL,timestamp)
    NFLX <- fread("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=NFLX&outputsize=full&apikey=6RSYX9BPXKZVXUS9&datatype=csv")
    NFLX$timestamp <- ymd(NFLX$timestamp)
    NFLX <- arrange(NFLX,timestamp)
    TSLA <- fread("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=TSLA&outputsize=full&apikey=6RSYX9BPXKZVXUS9&datatype=csv")
    TSLA$timestamp <- ymd(TSLA$timestamp)
    TSLA <- arrange(TSLA,timestamp)

    # Place data frames in a list
    df.list <- list(MSFT,AAPL,NFLX,TSLA)

# Specify file names
    file.names <- c("MSFT","AAPL","NFLX","TSLA")

现在准备数据了。

接下来,我想计算每个系列的累计和年度回报。我把它放在一个函数中然后用循环调用函数:

    # Create function for performing commands.
    genAnnualized = function(x){
      next.file <- data.frame(df.list[[1]],stringsAsFactors=FALSE)
      next.name <- paste0(file.names[i])
      new.df <- data.frame(next.file)

      # Calculate annualized return 
      # Make prices vector
      prices <- new.df[, "close", drop = FALSE]

      # Denote n the number of time periods:
      n <- nrow(prices)

      # Calculate close to close returns
      # lead in with rep,NA,1 to maintain length of vector comparible to data frame
      close_ret <- c(rep(NA, 1),(prices[2:n, 1] - prices[1:(n-1), 1])/prices[1:(n-1), 1])
      close_ret[1] <- 0

      # Compute continuously  returns (log returns)
      close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1])

      # Compute gross returns
      close_gret <- 1 + close_ret   # use close to close ret

      # Compute future values
      close_fv <- cumprod(close_gret)

      # Obtain first and last values
      ret.last <- tail(close_fv, n=1)
      ret.first <- head(close_fv, n=1)
      cum.ret <- (ret.last-ret.first)/ret.first

      # Get First And Last row to calculate time between
      ret.first.row <- head(new.df$timestamp, n=1)
      ret.last.row <- tail(new.df$timestamp, n=1)

     # Time diff 
     #trading.years.between <- as.numeric(difftime(as.Date(ret.last.row), as.Date(ret.first.row), unit="weeks"))/52.25

      # Find time diff
      ret.time <- ret.last.row - ret.first.row
      ret.trading.years.between <- ret.time/365   #252 trading days or 365 
      ret.trading.years.between <- as.numeric(ret.trading.years.between, units="days")   # Extract numerical value from time difference 'Time difference of 2837.208 days'
      # Annualized return
      # (1 + % diff of final) / (last balance to the power of 1/time first and last balance) -1
      ret.annual.return <- (1+cum.ret) ^ (1/ret.trading.years.between) -1

      ########## Store annualized and cumulative return in data frame for each iteration #########
      # Store file name as a row name :: next.name variable
      # Store final annualized return :: cret.annual.return
      # Store final cumulative return :: cum.ret
      output.df <- cbind(cum.ret,ret.annual.return)
      rownames(output.df) <- next.name

##################################################################

      # Sanity check, use PerformanceAnalytics for annualized return
      # TTR for returns
      # Calculate Close-to-Close returns
      require(TTR)
      require(PerformanceAnalytics)
      new.df$clret <- ROC(new.df$close, type = c("discrete"))
      new.df$clret[1] <- 0
      # Make time series object of returns and date
      require(xts)
      xts1 = xts(new.df$clret, order.by=as.Date(new.df$timestamp, format="%m/%d/%Y")) 
      Return.annualized(xts1)
      Return.cumulative(xts1, geometric=TRUE)
    }

调用函数循环遍历数据框列表中的每个数据框:

for (i in 1:length(df.list)){
  tryCatch({
    genAnnualized(df.list[[i]])
  }, error = function(e) { print(paste("i =", i, "failed:")) })
}

这应该是一个可重复的例子。

在每次迭代中,我希望将每个系列的累积和年度回报与数据集的名称一起存储(以便稍后可识别)。 我正在尝试使用以下功能:

  output.df <- cbind(cum.ret,ret.annual.return)
  rownames(output.df) <- next.name

我用以下内容指定名称:

file.names <- c("MSFT","AAPL","NFLX","TSLA")

然后在调用它的函数中:

next.name <- paste0(file.names[i])

我希望粘贴文件名,以便在数据框中标记最终输出。

我认为在命名每个行或列名称时可能需要重复两次名称。因此,它标记累积回报以及年化回报。

我认为有一般的想法,但几周来一直在努力寻找一些帮助。

基本上在输出数据框架中,我可以组织成四分位数等进行进一步分析

1 个答案:

答案 0 :(得分:1)

我最简单的解决方案是rbind每个file.names评估的数据帧,并将此行命名为相应文件的相同名称。  为清晰起见,我删除了评论(并提出了一些意见)。

'genAnnualized' = function(df_list) {

  next.file <- data.frame(df_list, stringsAsFactors=FALSE) # Put the parameter of the function here
  next.name <- paste0(file.names[i])
  new.df <- data.frame(next.file)

  prices <- new.df[, "close", drop = FALSE]
  n <- nrow(prices)

  close_ret <- c(rep(NA, 1),(prices[2:n, 1] - prices[1:(n-1), 1])/prices[1:(n-1), 1])
  close_ret[1] <- 0
  close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1])
  close_gret <- 1 + close_ret  
  close_fv <- cumprod(close_gret)
  ret.last <- tail(close_fv, n=1)
  ret.first <- head(close_fv, n=1)
  cum.ret <- (ret.last-ret.first)/ret.first
  ret.first.row <- head(new.df$timestamp, n=1)
  ret.last.row <- tail(new.df$timestamp, n=1)

  ret.time <- ret.last.row - ret.first.row
  ret.trading.years.between <- ret.time/365  
  ret.trading.years.between <- as.numeric(ret.trading.years.between, units="days")  

  ret.annual.return <- (1+cum.ret) ^ (1/ret.trading.years.between) -1

  output.df <- cbind(cum.ret,ret.annual.return)
  rownames(output.df) <- next.name

  ##################################################################

  new.df$clret <- TTR::ROC(new.df$close, type = c("discrete"))
  new.df$clret[1] <- 0
  xts1 = xts::xts(new.df$clret, order.by=as.Date(new.df$timestamp, format="%m/%d/%Y")) 

  # Create the output of the function : a named data.frame 
  out_df <- data.frame("Annualized Return" = PerformanceAnalytics::Return.annualized(xts1),
                       "Cumulative Return" = PerformanceAnalytics::Return.cumulative(xts1, geometric=TRUE))
  return(out_df)
}

# Initialize the output dataframe to which we will rowbind the results
cum_ret <- data.frame()

for (i in 1:length(df.list)){
    temp <- genAnnualized(df.list[[i]] )
    rownames(temp) <- file.names[i]
    cum_ret <- rbind.data.frame(cum_ret, temp)
}

这给出了一个数据帧,其命名行数等于 df.list和2列中的文件,用于年度化和累积回报。

> cum_ret
     Annualized.Return Cumulative.Return
MSFT       -0.02279597        -0.3361359
AAPL        0.02039616         0.4314812
NFLX        0.17454862        10.8991045
TSLA        0.44666765        13.8233571