tibbletime - 前一天的收盘

时间:2018-03-18 14:16:46

标签: r

请耐心等待,因为这是我的第一个问题。我还在试图弄清楚如何发布数据和我已经拥有的代码,所以现在我只想解释一下。如果这不是提出问题的可接受方式,请忽略这个问题,下次我会尝试做正确的事。

我有一个数据框,我想做每日计算。在特定的一天,我已经拥有OpenUnits,BuyUnits,SellUnits,CloseUnits和Interest。这些值是由另一个系统计算的。我需要将每日利息比例与销售的单位数量进行比较。我可以进行计算,但我无法弄清楚如何获得OpenInterest(前一天的关闭),而不在数据框上使用for循环。 ClosingInterest应该是OpenInterest + Interest - SellUnits / OpenUnits * OpenInterest

我尝试使用mutate(OpenInterest = lag(ClosingInterest),ClosingInterest = OpenInterest + Interest - SellUnits / OpenUnits * OpenInterest),但这似乎不起作用。

我的代码使用for循环,但我希望可能有更好,更快的方法。

此致

library(tidyverse)
library(tibbletime)
library(lubridate)

sample <- list(OpenUnits = c(7500000, 7500000, 7500000, 7500000, 7500000, 
                             3300000, 3300000, 3300000, 3300000, 3300000), ClosingUnits = c(7500000, 
                                                                                            7500000, 7500000, 7500000, 3300000, 3300000, 3300000, 3300000, 
                                                                                            3300000, 3300000), AccrualDate = 16892:16901, AiaAdjustAmt = c(1844.70359677349, 
                                                                                                                                                           1845.18465061665, 1845.66582990696, 1846.14713467713, 812.516568582349, 
                                                                                                                                                           812.728453146696, 812.940392965385, 813.152388052826, 813.364438423431, 
                                                                                                                                                           813.576544091616), SellUnits = c(NA, NA, NA, NA, 4200000, NA, 
                                                                                                                                                                                            NA, NA, NA, NA))
sample <- sample %>%
  as_tibble() %>% 
  mutate(
    AccrualDate = lubridate::as_date(AccrualDate),
    SellUnits = if_else(is.na(SellUnits), 0, SellUnits)
  ) %>% 
  as_tbl_time(index = AccrualDate)

sample <- sample %>% 
  mutate(
    RealInterest = 0,
    OpenInterest = cumsum(AiaAdjustAmt) - cumsum(RealInterest) - AiaAdjustAmt - RealInterest,
    RealInterest = OpenInterest*SellUnits/OpenUnits 
  )

这不会产生正确答案。

# A time tibble: 10 x 7
# Index: AccrualDate
   OpenUnits ClosingUnits AccrualDate AiaAdjustAmt SellUnits s24j_real s24j_open
       <dbl>        <dbl> <date>             <dbl>     <dbl>     <dbl>     <dbl>
 1  7500000.     7500000. 2016-04-01         1845.        0.        0.        0.
 2  7500000.     7500000. 2016-04-02         1845.        0.        0.     1845.
 3  7500000.     7500000. 2016-04-03         1846.        0.        0.     3690.
 4  7500000.     7500000. 2016-04-04         1846.        0.        0.     5536.
 5  7500000.     3300000. 2016-04-05          813.  4200000.     4134.     7382.
 6  3300000.     3300000. 2016-04-06          813.        0.        0.     8194.
 7  3300000.     3300000. 2016-04-07          813.        0.        0.     9007.
 8  3300000.     3300000. 2016-04-08          813.        0.        0.     9820.
 9  3300000.     3300000. 2016-04-09          813.        0.        0.    10633.
10  3300000.     3300000. 2016-04-10          814.        0.        0.    11446.

正确答案应如下所示。这是我用for循环实现的,我试图避免,因为它在嵌套的较大数据集上感觉很慢。

# A time tibble: 10 x 7
# Index: AccrualDate
   OpenUnits ClosingUnits AccrualDate AiaAdjustAmt SellUnits s24j_real s24j_open
       <dbl>        <dbl> <date>             <dbl>     <dbl>     <dbl>     <dbl>
 1  7500000.     7500000. 2016-04-01         1845.        0.        0.        0.
 2  7500000.     7500000. 2016-04-02         1845.        0.        0.     1845.
 3  7500000.     7500000. 2016-04-03         1846.        0.        0.     3690.
 4  7500000.     7500000. 2016-04-04         1846.        0.        0.     5536.
 5  7500000.     3300000. 2016-04-05          813.  4200000.     4134.     7382.
 6  3300000.     3300000. 2016-04-06          813.        0.        0.     4060.
 7  3300000.     3300000. 2016-04-07          813.        0.        0.     4873.
 8  3300000.     3300000. 2016-04-08          813.        0.        0.     5686.
 9  3300000.     3300000. 2016-04-09          813.        0.        0.     6499.
10  3300000.     3300000. 2016-04-10          814.        0.        0.     7313.

生成正确答案的代码。

sample2 <- sample %>% 
  mutate(
    sell_ratio = if_else(!is.na(SellUnits), SellUnits/OpenUnits, 0),
    s24j_open = 0,
    s24j_close = 0,
    s24j_real = 0     
  )

open <- 0
close <- 0  

for (i in seq_along(sample2$AccrualDate)) {

  open <- close
  sellratio <- sample2[i, ]$sell_ratio
  int <- sample2[i, ]$AiaAdjustAmt
  real <- sellratio*open

  close <- open - real + int

  sample2[i, ]$s24j_open <- open
  sample2[i, ]$s24j_real <- real
  sample2[i, ]$s24j_close <- close
}

sample2 %>% 
  select(
    OpenUnits, ClosingUnits, AccrualDate, AiaAdjustAmt, SellUnits, s24j_real, s24j_open
  )

1 个答案:

答案 0 :(得分:0)

迟到总比没有好:

## defining the data frame
sample <- data.frame(OpenUnits = c(7500000, 7500000, 7500000, 7500000, 7500000, 
                               3300000, 3300000, 3300000, 3300000, 3300000), 
                 ClosingUnits = c(7500000, 7500000, 7500000, 7500000, 3300000, 3300000, 3300000, 3300000, 3300000, 3300000), 
                 AccrualDate = 16892:16901, 
                 AiaAdjustAmt = c(1844.70359677349, 1845.18465061665, 1845.66582990696, 1846.14713467713, 812.516568582349, 
                                  812.728453146696, 812.940392965385, 813.152388052826, 813.364438423431, 813.576544091616), 
                 SellUnits = c(NA, NA, NA, NA, 4200000, NA, NA, NA, NA, NA))

## defining a function to deliver the final output
## warning: the function is recursive
myfct <- function(n){
  ratio <- (sample$SellUnits/sample$OpenUnits)[n]
  ratio <- ifelse(is.na(ratio), 0, ratio)
  if(n > 1){
    vec <- myfct(n-1)
    val <- vec[length(vec)]
    newval <- sample$AiaAdjustAmt[n]+val*(1-ratio)
    newvec <- c(vec, newval)
    return(newvec)
  }
  if(n == 1){return(sample$AiaAdjustAmt[n]+0*(1-ratio))}
}

## finally applying the function and binding the output
## depending on dim(sample)[1], here one might have to add something like:
## options(expressions=10000) (to avoid "Error: evaluation nested too deeply...")
close <- myfct(dim(sample)[1])
open <- c(0, close[1:(length(close)-1)])
real <- open*ifelse(is.na(sample$SellUnits/sample$OpenUnits),
                    0,
                    sample$SellUnits/sample$OpenUnits)
output <- cbind.data.frame(sample, real, open)

结果如下:

> output
   OpenUnits ClosingUnits AccrualDate AiaAdjustAmt SellUnits     real     open
1    7500000      7500000       16892    1844.7036        NA    0.000    0.000
2    7500000      7500000       16893    1845.1847        NA    0.000 1844.704
3    7500000      7500000       16894    1845.6658        NA    0.000 3689.888
4    7500000      7500000       16895    1846.1471        NA    0.000 5535.554
5    7500000      3300000       16896     812.5166   4200000 4133.753 7381.701
6    3300000      3300000       16897     812.7285        NA    0.000 4060.465
7    3300000      3300000       16898     812.9404        NA    0.000 4873.194
8    3300000      3300000       16899     813.1524        NA    0.000 5686.134
9    3300000      3300000       16900     813.3644        NA    0.000 6499.286
10   3300000      3300000       16901     813.5765        NA    0.000 7312.651

然而,递归函数不会提高性能,而相反see this post)。虽然在上面的代码中myfct仅用于计算closerealopen是从中派生的)。 无论如何,我相信代码可以被修改以便失去递归和/或sapply - 我会尝试并更新代码。

修改

我的代码的第一个版本的问题是使用递归函数和sapply使得该过程非常冗长。实际上,myfct(n)已经为所有myfct(k)计算了k<n,因此使用sapply计算这些值是多余且低效的。

在此,我保留旧的functionsapply以获得完整性(已在上面的代码中编辑过新的function):

myfct.old <- function(n){
  ratio <- (sample$SellUnits/sample$OpenUnits)[n]
  ratio <- ifelse(is.na(ratio), 0, ratio)
  if(n > 1){return(sample$AiaAdjustAmt[n]+myfct.old(n-1)*(1-ratio))}
  if(n == 1){return(sample$AiaAdjustAmt[n]+0*(1-ratio))}
}

## finally applying the function and binding the output
close <- sapply(1:dim(sample)[1], myfct)

最后,这是不同方法的性能比较:

## Increasing the size of the data frame

sample <- do.call("rbind", replicate(100, sample, simplify = FALSE))

## (1) New method
start.time <- Sys.time()
close <- myfct(dim(sample)[1])
open <- c(0, close[1:(length(close)-1)])
real <- open*ifelse(is.na(sample$SellUnits/sample$OpenUnits),
                0,
                sample$SellUnits/sample$OpenUnits)
output <- cbind.data.frame(sample, real, open)
end.time <- Sys.time()
time.taken1 <- end.time - start.time
time.taken1
"Time difference of 0.19402 secs"

## (2) A loop
start.time <- Sys.time()
close <- 0
s24j_open <- c()
s24j_real <- c()
s24j_close <- c()
for(k in 1:dim(sample)[1]){
  open <- close
  ratio <- (sample$SellUnits/sample$OpenUnits)[k]
  ratio <- ifelse(is.na(ratio), 0, ratio)
  real <- open*ratio
  close <- sample$AiaAdjustAmt[k]+open-real

  s24j_open <- c(s24j_open, open)
  s24j_real <- c(s24j_real, real)
  s24j_close <- c(s24j_close, close)
}
output <- cbind.data.frame(sample, s24j_real, s24j_open)
end.time <- Sys.time()
time.taken2 <- end.time - start.time
time.taken2
"Time difference of 0.3530352 secs"

## (3) Old method
start.time <- Sys.time()
close <- sapply(1:dim(sample)[1], myfct.old)
open <- c(0, close[1:(length(close)-1)])
real <- open*ifelse(is.na(sample$SellUnits/sample$OpenUnits),
                    0,
                    sample$SellUnits/sample$OpenUnits)
output <- cbind.data.frame(sample, real, open)
end.time <- Sys.time()
time.taken3 <- end.time - start.time
time.taken3
"Time difference of 48.86089 secs"