我编写了一个功能,可以对91个时间序列模型进行后退逐步Arima建模。
该函数有两个输入:
1. a list of time series objects
2. a list of data frames
功能如下:
backward_stepwise<-function(x, y){
repeat{
arima_result<-auto_arima(x)
last_col<-colnames(x)[ncol(x)]
names(arima_result$coef)<-gsub(pattern = "xreg", replacement = last_col,
x = names(arima_result$coef))
arima_pvals<-p_calc(arima_result)
arima_outputs<-run_outputs(arima_result, arima_pvals)
arima_ranked<-rank_pval(arima_outputs)
remove_num_one<-remove_one(arima_ranked)
y<-subset(y, select = colnames(y) != remove_num_one)
x<-as.ts(y)
if(min(arima_ranked$rank, na.rm = TRUE) != 1){
break
}
}
return(list(arima_result, arima_ranked))
}
其中:
auto_arima<-function(x){
arima_results<-auto.arima(x[,2], xreg = x[,3:ncol(x)], approximation = FALSE, stepwise = FALSE)
}
p_calc<-function(x){
p_vals<-round((1-pnorm(abs(x$coef)/sqrt(diag(x$var.coef))))*2, digits = 3)
p_vals<-broom::tidy(p_vals)
}
run_outputs<-function(x, y){
outputs<-x$coef
outputs<-broom::tidy(outputs)
outputs<-cbind(outputs, y)
outputs<-outputs[,c(1,2,4)] # selecting columns to include
setNames(object = outputs, nm = c("term", "coef", "pval")) # naming colums of outputs
}
rank_pval<-function(x){
x["rank"]<-NA
cond<-{substr(x$term,1,2) != "ar" &
substr(x$term,1,2) != "ma" &
substr(x$term, 1, nchar(x$term)) != "intercept" &
substr(x$term, 1, nchar(x$term)) != "drift" &
x$pval > 0.2}
x[cond, "rank"]<-rank(-x[cond,]$pval, na.last = NA)
return(x)
}
remove_one<-function(x){
x<-subset(x$term, subset = x$rank == 1)
}
cond_select<-function(x, y){
x<-subset(x, select = names(x) != y)
x<-ts(x)
}
最快的实现(68.69秒)是通过使用:
plan(multiprocess)
ts_outputs<-future_mapply(backward_stepwise, list_ts_actual, list_df_actual, SIMPLIFY = FALSE)
我可以采取哪些进一步的措施?最终,我希望在本地计算机上尽快运行。
我正在使用带有Intel®Core™i&7-7770HQ CPU的Dell XPS 15 9560-4核,8逻辑处理器。
我知道您无法通过mcmapply进行并行处理,因为您无法在Windows OS上进行分叉。
我尝试使用cmpfun编译实际函数,但没有提高任何速度。
可能性:
我还有其他选择吗?
感谢任何帮助或指导