请耐心等待,因为这是我的第一个问题。我还在试图弄清楚如何发布数据和我已经拥有的代码,所以现在我只想解释一下。如果这不是提出问题的可接受方式,请忽略这个问题,下次我会尝试做正确的事。
我有一个数据框,我想做每日计算。在特定的一天,我已经拥有OpenUnits,BuyUnits,SellUnits,CloseUnits和Interest。这些值是由另一个系统计算的。我需要将每日利息比例与销售的单位数量进行比较。我可以进行计算,但我无法弄清楚如何获得OpenInterest(前一天的关闭),而不在数据框上使用for循环。 ClosingInterest应该是OpenInterest + Interest - SellUnits / OpenUnits * OpenInterest
我尝试使用mutate(OpenInterest = lag(ClosingInterest),ClosingInterest = OpenInterest + Interest - SellUnits / OpenUnits * OpenInterest),但这似乎不起作用。
我的代码使用for循环,但我希望可能有更好,更快的方法。
此致
library(tidyverse)
library(tibbletime)
library(lubridate)
sample <- list(OpenUnits = c(7500000, 7500000, 7500000, 7500000, 7500000,
3300000, 3300000, 3300000, 3300000, 3300000), ClosingUnits = c(7500000,
7500000, 7500000, 7500000, 3300000, 3300000, 3300000, 3300000,
3300000, 3300000), AccrualDate = 16892:16901, AiaAdjustAmt = c(1844.70359677349,
1845.18465061665, 1845.66582990696, 1846.14713467713, 812.516568582349,
812.728453146696, 812.940392965385, 813.152388052826, 813.364438423431,
813.576544091616), SellUnits = c(NA, NA, NA, NA, 4200000, NA,
NA, NA, NA, NA))
sample <- sample %>%
as_tibble() %>%
mutate(
AccrualDate = lubridate::as_date(AccrualDate),
SellUnits = if_else(is.na(SellUnits), 0, SellUnits)
) %>%
as_tbl_time(index = AccrualDate)
sample <- sample %>%
mutate(
RealInterest = 0,
OpenInterest = cumsum(AiaAdjustAmt) - cumsum(RealInterest) - AiaAdjustAmt - RealInterest,
RealInterest = OpenInterest*SellUnits/OpenUnits
)
这不会产生正确答案。
# A time tibble: 10 x 7
# Index: AccrualDate
OpenUnits ClosingUnits AccrualDate AiaAdjustAmt SellUnits s24j_real s24j_open
<dbl> <dbl> <date> <dbl> <dbl> <dbl> <dbl>
1 7500000. 7500000. 2016-04-01 1845. 0. 0. 0.
2 7500000. 7500000. 2016-04-02 1845. 0. 0. 1845.
3 7500000. 7500000. 2016-04-03 1846. 0. 0. 3690.
4 7500000. 7500000. 2016-04-04 1846. 0. 0. 5536.
5 7500000. 3300000. 2016-04-05 813. 4200000. 4134. 7382.
6 3300000. 3300000. 2016-04-06 813. 0. 0. 8194.
7 3300000. 3300000. 2016-04-07 813. 0. 0. 9007.
8 3300000. 3300000. 2016-04-08 813. 0. 0. 9820.
9 3300000. 3300000. 2016-04-09 813. 0. 0. 10633.
10 3300000. 3300000. 2016-04-10 814. 0. 0. 11446.
正确答案应如下所示。这是我用for循环实现的,我试图避免,因为它在嵌套的较大数据集上感觉很慢。
# A time tibble: 10 x 7
# Index: AccrualDate
OpenUnits ClosingUnits AccrualDate AiaAdjustAmt SellUnits s24j_real s24j_open
<dbl> <dbl> <date> <dbl> <dbl> <dbl> <dbl>
1 7500000. 7500000. 2016-04-01 1845. 0. 0. 0.
2 7500000. 7500000. 2016-04-02 1845. 0. 0. 1845.
3 7500000. 7500000. 2016-04-03 1846. 0. 0. 3690.
4 7500000. 7500000. 2016-04-04 1846. 0. 0. 5536.
5 7500000. 3300000. 2016-04-05 813. 4200000. 4134. 7382.
6 3300000. 3300000. 2016-04-06 813. 0. 0. 4060.
7 3300000. 3300000. 2016-04-07 813. 0. 0. 4873.
8 3300000. 3300000. 2016-04-08 813. 0. 0. 5686.
9 3300000. 3300000. 2016-04-09 813. 0. 0. 6499.
10 3300000. 3300000. 2016-04-10 814. 0. 0. 7313.
生成正确答案的代码。
sample2 <- sample %>%
mutate(
sell_ratio = if_else(!is.na(SellUnits), SellUnits/OpenUnits, 0),
s24j_open = 0,
s24j_close = 0,
s24j_real = 0
)
open <- 0
close <- 0
for (i in seq_along(sample2$AccrualDate)) {
open <- close
sellratio <- sample2[i, ]$sell_ratio
int <- sample2[i, ]$AiaAdjustAmt
real <- sellratio*open
close <- open - real + int
sample2[i, ]$s24j_open <- open
sample2[i, ]$s24j_real <- real
sample2[i, ]$s24j_close <- close
}
sample2 %>%
select(
OpenUnits, ClosingUnits, AccrualDate, AiaAdjustAmt, SellUnits, s24j_real, s24j_open
)
答案 0 :(得分:0)
迟到总比没有好:
## defining the data frame
sample <- data.frame(OpenUnits = c(7500000, 7500000, 7500000, 7500000, 7500000,
3300000, 3300000, 3300000, 3300000, 3300000),
ClosingUnits = c(7500000, 7500000, 7500000, 7500000, 3300000, 3300000, 3300000, 3300000, 3300000, 3300000),
AccrualDate = 16892:16901,
AiaAdjustAmt = c(1844.70359677349, 1845.18465061665, 1845.66582990696, 1846.14713467713, 812.516568582349,
812.728453146696, 812.940392965385, 813.152388052826, 813.364438423431, 813.576544091616),
SellUnits = c(NA, NA, NA, NA, 4200000, NA, NA, NA, NA, NA))
## defining a function to deliver the final output
## warning: the function is recursive
myfct <- function(n){
ratio <- (sample$SellUnits/sample$OpenUnits)[n]
ratio <- ifelse(is.na(ratio), 0, ratio)
if(n > 1){
vec <- myfct(n-1)
val <- vec[length(vec)]
newval <- sample$AiaAdjustAmt[n]+val*(1-ratio)
newvec <- c(vec, newval)
return(newvec)
}
if(n == 1){return(sample$AiaAdjustAmt[n]+0*(1-ratio))}
}
## finally applying the function and binding the output
## depending on dim(sample)[1], here one might have to add something like:
## options(expressions=10000) (to avoid "Error: evaluation nested too deeply...")
close <- myfct(dim(sample)[1])
open <- c(0, close[1:(length(close)-1)])
real <- open*ifelse(is.na(sample$SellUnits/sample$OpenUnits),
0,
sample$SellUnits/sample$OpenUnits)
output <- cbind.data.frame(sample, real, open)
结果如下:
> output
OpenUnits ClosingUnits AccrualDate AiaAdjustAmt SellUnits real open
1 7500000 7500000 16892 1844.7036 NA 0.000 0.000
2 7500000 7500000 16893 1845.1847 NA 0.000 1844.704
3 7500000 7500000 16894 1845.6658 NA 0.000 3689.888
4 7500000 7500000 16895 1846.1471 NA 0.000 5535.554
5 7500000 3300000 16896 812.5166 4200000 4133.753 7381.701
6 3300000 3300000 16897 812.7285 NA 0.000 4060.465
7 3300000 3300000 16898 812.9404 NA 0.000 4873.194
8 3300000 3300000 16899 813.1524 NA 0.000 5686.134
9 3300000 3300000 16900 813.3644 NA 0.000 6499.286
10 3300000 3300000 16901 813.5765 NA 0.000 7312.651
然而,递归函数不会提高性能,而相反(see this post)。虽然在上面的代码中myfct
仅用于计算close
(real
,open
是从中派生的)。 无论如何,我相信代码可以被修改以便失去递归和/或 sapply
- 我会尝试并更新代码。
修改强>
我的代码的第一个版本的问题是使用递归函数和sapply使得该过程非常冗长。实际上,myfct(n)
已经为所有myfct(k)
计算了k<n
,因此使用sapply
计算这些值是多余且低效的。
在此,我保留旧的function
和sapply
以获得完整性(已在上面的代码中编辑过新的function
):
myfct.old <- function(n){
ratio <- (sample$SellUnits/sample$OpenUnits)[n]
ratio <- ifelse(is.na(ratio), 0, ratio)
if(n > 1){return(sample$AiaAdjustAmt[n]+myfct.old(n-1)*(1-ratio))}
if(n == 1){return(sample$AiaAdjustAmt[n]+0*(1-ratio))}
}
## finally applying the function and binding the output
close <- sapply(1:dim(sample)[1], myfct)
最后,这是不同方法的性能比较:
## Increasing the size of the data frame
sample <- do.call("rbind", replicate(100, sample, simplify = FALSE))
## (1) New method
start.time <- Sys.time()
close <- myfct(dim(sample)[1])
open <- c(0, close[1:(length(close)-1)])
real <- open*ifelse(is.na(sample$SellUnits/sample$OpenUnits),
0,
sample$SellUnits/sample$OpenUnits)
output <- cbind.data.frame(sample, real, open)
end.time <- Sys.time()
time.taken1 <- end.time - start.time
time.taken1
"Time difference of 0.19402 secs"
## (2) A loop
start.time <- Sys.time()
close <- 0
s24j_open <- c()
s24j_real <- c()
s24j_close <- c()
for(k in 1:dim(sample)[1]){
open <- close
ratio <- (sample$SellUnits/sample$OpenUnits)[k]
ratio <- ifelse(is.na(ratio), 0, ratio)
real <- open*ratio
close <- sample$AiaAdjustAmt[k]+open-real
s24j_open <- c(s24j_open, open)
s24j_real <- c(s24j_real, real)
s24j_close <- c(s24j_close, close)
}
output <- cbind.data.frame(sample, s24j_real, s24j_open)
end.time <- Sys.time()
time.taken2 <- end.time - start.time
time.taken2
"Time difference of 0.3530352 secs"
## (3) Old method
start.time <- Sys.time()
close <- sapply(1:dim(sample)[1], myfct.old)
open <- c(0, close[1:(length(close)-1)])
real <- open*ifelse(is.na(sample$SellUnits/sample$OpenUnits),
0,
sample$SellUnits/sample$OpenUnits)
output <- cbind.data.frame(sample, real, open)
end.time <- Sys.time()
time.taken3 <- end.time - start.time
time.taken3
"Time difference of 48.86089 secs"