我试图在15天的条形图上计算指数移动平均线,但是想要在每天(结束日)/柱上看到15天条形EMA的“演变”。所以,这意味着我有15天的酒吧。当新数据每天进入时,我想使用新信息重新计算EMA。实际上我有15天的酒吧,然后,每天我的新15天酒吧开始增长,每个新的酒吧应该用于EMA计算以及之前的15天酒吧。
让我们说我们从2012-01-01开始(我们有这个例子的每个日历日的数据),在2012-01-15结束时我们有第一个完整的15天吧。在2012-03-01完成4个完整的15天栏后,我们可以开始计算4 bar EMA(EMA(x,n = 4))。在2012-03-02结束时,我们使用到目前为止的信息并在2012-03-02计算EMA,假装2012-03-02的OHLC正在进行中。因此,我们在2012-03-02获取4个完整的条形和条形并计算EMA(x,n = 4)。然后我们再等一天,看看正在进行的新15天酒吧发生了什么(请参阅下面的函数to.period.cumulative了解详细信息)并计算EMA的新值......所以在接下来的15天之后......见函数EMA.cumulative以下详细信息......
下面请找到我能够想到的东西。我的表现是不可接受的,而且由于我的R知识有限,我无法更快地完成任务。
library(quantmod)
do.call.rbind <- function(lst) {
while(length(lst) > 1) {
idxlst <- seq(from=1, to=length(lst), by=2)
lst <- lapply(idxlst, function(i) {
if(i==length(lst)) { return(lst[[i]]) }
return(rbind(lst[[i]], lst[[i+1]]))
})
}
lst[[1]]
}
to.period.cumulative <- function(x, name=NULL, period="days", numPeriods=15) {
if(is.null(name))
name <- deparse(substitute(x))
cnames <- c("Open", "High", "Low", "Close")
if (has.Vo(x))
cnames <- c(cnames, "Volume")
cnames <- paste(name, cnames, sep=".")
if (quantmod:::is.OHLCV(x)) {
x <- OHLCV(x)
out <- do.call.rbind(
lapply(split(x, f=period, k=numPeriods),
function(x) cbind(rep(first(x[,1]), NROW(x[,1])),
cummax(x[,2]), cummin(x[,3]), x[,4], cumsum(x[,5]))))
} else if (quantmod:::is.OHLC(x)) {
x <- OHLC(x)
out <- do.call.rbind(
lapply(split(x, f=period, k=numPeriods),
function(x) cbind(rep(first(x[,1]), NROW(x[,1])),
cummax(x[,2]), cummin(x[,3]), x[,4])))
} else {
stop("Object does not have OHLC(V).")
}
colnames(out) <- cnames
return(out)
}
EMA.cumulative<-function(cumulativeBars, nEMA = 4, period="days", numPeriods=15) {
barsEndptCl <- Cl(cumulativeBars[endpoints(cumulativeBars, on=period, k=numPeriods)])
# TODO: This is sloooooooooooooooooow...
outEMA <- do.call.rbind(
lapply(split(Cl(cumulativeBars), period),
function(x) {
previousFullBars <- barsEndptCl[index(barsEndptCl) < last(index(x)), ]
if (NROW(previousFullBars) >= (nEMA - 1)) {
last(EMA(last(rbind(previousFullBars, x), n=(nEMA + 1)), n=nEMA))
} else {
xts(NA, order.by=index(x))
}
}))
colnames(outEMA) <- paste("EMA", nEMA, sep="")
return(outEMA)
}
getSymbols("SPY", from="2010-01-01")
SPY.cumulative <- to.period.cumulative(SPY, , name="SPY")
system.time(
SPY.EMA <- EMA.cumulative(SPY.cumulative)
)
在我的系统上需要
user system elapsed
4.708 0.000 4.410
可接受的执行时间不到一秒......是否可以使用纯R来实现这一目标?
此帖子与Optimize moving averages calculation - is it possible?相关联,但我没有收到任何答案。我现在能够创建一个可重现的示例,更详细地解释我想要加速的内容。我希望这个问题现在更有意义。
有关如何提高速度的任何想法都受到高度赞赏。
答案 0 :(得分:6)
我没有找到一个令人满意的解决方案来解决我使用R的问题。所以我采用了旧的工具,c语言,结果比我想象的要好。感谢“推”我使用Rcpp,内联等伟大的工具。惊人的。我想,每当我有未来的性能要求并且使用R无法满足时,我会将C添加到R并且性能就在那里。因此,请参阅下面的代码和性能问题的解决方案。
# How to speedup cumulative EMA calculation
#
###############################################################################
library(quantmod)
library(Rcpp)
library(inline)
library(rbenchmark)
do.call.rbind <- function(lst) {
while(length(lst) > 1) {
idxlst <- seq(from=1, to=length(lst), by=2)
lst <- lapply(idxlst, function(i) {
if(i==length(lst)) { return(lst[[i]]) }
return(rbind(lst[[i]], lst[[i+1]]))
})
}
lst[[1]]
}
to.period.cumulative <- function(x, name=NULL, period="days", numPeriods=15) {
if(is.null(name))
name <- deparse(substitute(x))
cnames <- c("Open", "High", "Low", "Close")
if (has.Vo(x))
cnames <- c(cnames, "Volume")
cnames <- paste(name, cnames, sep=".")
if (quantmod:::is.OHLCV(x)) {
x <- quantmod:::OHLCV(x)
out <- do.call.rbind(
lapply(split(x, f=period, k=numPeriods),
function(x) cbind(rep(first(x[,1]), NROW(x[,1])),
cummax(x[,2]), cummin(x[,3]), x[,4], cumsum(x[,5]))))
} else if (quantmod:::is.OHLC(x)) {
x <- OHLC(x)
out <- do.call.rbind(
lapply(split(x, f=period, k=numPeriods),
function(x) cbind(rep(first(x[,1]), NROW(x[,1])),
cummax(x[,2]), cummin(x[,3]), x[,4])))
} else {
stop("Object does not have OHLC(V).")
}
colnames(out) <- cnames
return(out)
}
EMA.cumulative<-function(cumulativeBars, nEMA = 4, period="days", numPeriods=15) {
barsEndptCl <- Cl(cumulativeBars[endpoints(cumulativeBars, on=period, k=numPeriods)])
# TODO: This is sloooooooooooooooooow...
outEMA <- do.call.rbind(
lapply(split(Cl(cumulativeBars), period),
function(x) {
previousFullBars <- barsEndptCl[index(barsEndptCl) < last(index(x)), ]
if (NROW(previousFullBars) >= (nEMA - 1)) {
last(EMA(last(rbind(previousFullBars, x), n=(nEMA + 1)), n=nEMA))
} else {
xts(NA, order.by=index(x))
}
}))
colnames(outEMA) <- paste("EMA", nEMA, sep="")
return(outEMA)
}
EMA.c.c.code <- '
/* Initalize loop and PROTECT counters */
int i, P=0;
/* ensure that cumbars and fullbarsrep is double */
if(TYPEOF(cumbars) != REALSXP) {
PROTECT(cumbars = coerceVector(cumbars, REALSXP)); P++;
}
/* Pointers to function arguments */
double *d_cumbars = REAL(cumbars);
int i_nper = asInteger(nperiod);
int i_n = asInteger(n);
double d_ratio = asReal(ratio);
/* Input object length */
int nr = nrows(cumbars);
/* Initalize result R object */
SEXP result;
PROTECT(result = allocVector(REALSXP,nr)); P++;
double *d_result = REAL(result);
/* Find first non-NA input value */
int beg = i_n*i_nper - 1;
d_result[beg] = 0;
for(i = 0; i <= beg; i++) {
/* Account for leading NAs in input */
if(ISNA(d_cumbars[i])) {
d_result[i] = NA_REAL;
beg++;
d_result[beg] = 0;
continue;
}
/* Set leading NAs in output */
if(i < beg) {
d_result[i] = NA_REAL;
}
/* Raw mean to start EMA - but only on full bars*/
if ((i != 0) && (i%i_nper == (i_nper - 1))) {
d_result[beg] += d_cumbars[i] / i_n;
}
}
/* Loop over non-NA input values */
int i_lookback = 0;
for(i = beg+1; i < nr; i++) {
i_lookback = i%i_nper;
if (i_lookback == 0) {
i_lookback = 1;
}
/*Previous result should be based only on full bars*/
d_result[i] = d_cumbars[i] * d_ratio + d_result[i-i_lookback] * (1-d_ratio);
}
/* UNPROTECT R objects and return result */
UNPROTECT(P);
return(result);
'
EMA.c.c <- cfunction(signature(cumbars="numeric", nperiod="numeric", n="numeric", ratio="numeric"), EMA.c.c.code)
EMA.cumulative.c<-function(cumulativeBars, nEMA = 4, period="days", numPeriods=15) {
ratio <- 2/(nEMA+1)
outEMA <- EMA.c.c(cumbars=Cl(cumulativeBars), nperiod=numPeriods, n=nEMA, ratio=ratio)
outEMA <- reclass(outEMA, Cl(cumulativeBars))
colnames(outEMA) <- paste("EMA", nEMA, sep="")
return(outEMA)
}
getSymbols("SPY", from="2010-01-01")
SPY.cumulative <- to.period.cumulative(SPY, name="SPY")
system.time(
SPY.EMA <- EMA.cumulative(SPY.cumulative)
)
system.time(
SPY.EMA.c <- EMA.cumulative.c(SPY.cumulative)
)
res <- benchmark(EMA.cumulative(SPY.cumulative), EMA.cumulative.c(SPY.cumulative),
columns=c("test", "replications", "elapsed", "relative", "user.self", "sys.self"),
order="relative",
replications=10)
print(res)
编辑:为了表明我的繁琐的性能改进(我相信它可以做得更好,因为实际上我创建了双循环)R这里是打印输出:
> print(res)
test replications elapsed relative user.self
2 EMA.cumulative.c(SPY.cumulative) 10 0.026 1.000 0.024
1 EMA.cumulative(SPY.cumulative) 10 57.732 2220.462 56.755
因此,根据我的标准,SF类型的改进......